Update: Docs

coder-hxl · coder-hxl · commit b16cb70c8826 · 2023-03-23T19:03:03.000+08:00
diff --git a/README.md b/README.md
@@ -41,20 +41,20 @@ The crawlPage API internally uses the [puppeteer](https://github.com/puppeteer/p
     * [Multiple ways of writing requestConfig options](#Multiple-ways-of-writing-requestConfig-options)
     * [Multiple ways to get results](#Multiple-ways-to-get-results)
 - [API](#API)
-    * [x-crawl](#x-crawl-2)
-       + [Type](#Type-1)
+    * [xCrawl](#xCrawl)
+       + [Type](#Type)
        + [Example](#Example-1)
     * [crawlPage](#crawlPage)
-       + [Type](#Type-2)
+       + [Type](#Type-1)
        + [Example](#Example-2)
     * [crawlData](#crawlData)
-       + [Type](#Type-3)
+       + [Type](#Type-2)
        + [Example](#Example-3)
     * [crawlFile](#crawlFile)
-       + [Type](#Type-4)
+       + [Type](#Type-3)
        + [Example](#Example-4)
     * [crawlPolling](#crawlPolling)
-       + [Type](#Type-5)
+       + [Type](#Type-4)
        + [Example](#Example-5)
 - [Types](#Types)
     * [AnyObject](#AnyObject)
@@ -64,14 +64,14 @@ The crawlPage API internally uses the [puppeteer](https://github.com/puppeteer/p
     * [RequestConfig](#RequestConfig)
     * [IntervalTime](#IntervalTime)
     * [XCrawlBaseConfig](#XCrawlBaseConfig)
-    * [CrawlPageConfig](#CrawlPageConfig	)
+    * [CrawlPageConfig](#CrawlPageConfig)
     * [CrawlBaseConfigV1](#CrawlBaseConfigV1)
     * [CrawlDataConfig](#CrawlDataConfig) 
     * [CrawlFileConfig](#CrawlFileConfig)
     * [StartPollingConfig](#StartPollingConfig)
     * [CrawlResCommonV1](#CrawlResCommonV1)
     * [CrawlResCommonArrV1](#CrawlResCommonArrV1)
-    * [CrawlPage](#CrawlPage-2) 
+    * [CrawlPage](#CrawlPage-1) 
     * [FileInfo](#FileInfo)
 - [More](#More)
 
@@ -98,23 +98,25 @@ const myXCrawl = xCrawl({
 })
 
 // 3.Set the crawling task
-// Call the startPolling API to start the polling function, and the callback function will be called every other day
-myXCrawl.startPolling({ d: 1 }, (count, stopPolling) => {
-  myXCrawl.crawlPage('https://zh.airbnb.com/s/*/plus_homes').then((res) => {
-    const { jsdom } = res // By default, the JSDOM library is used to parse Page
-
-    // Get the cover image elements for Plus listings
-    const imgEls = jsdom.window.document
-      .querySelector('.a1stauiv')
-      ?.querySelectorAll('picture img')
-
-    // set request configuration
-    const requestConfig: string[] = []
-    imgEls?.forEach((item) => requestConfig.push(item.src))
-
-    // Call the crawlFile API to crawl pictures
-    myXCrawl.crawlFile({ requestConfig, fileConfig: { storeDir: './upload' } })
-  })
+/* 
+  Call the startPolling API to start the polling function, 
+  and the callback function will be called every other day
+*/
+myXCrawl.startPolling({ d: 1 }, async (count, stopPolling) => {
+  // Call crawlPage API to crawl Page
+  const { jsdom } = await myXCrawl.crawlPage('https://zh.airbnb.com/s/*/plus_homes')
+
+  // Get the cover image elements for Plus listings
+  const imgEls = jsdom.window.document
+    .querySelector('.a1stauiv')
+    ?.querySelectorAll('picture img')
+
+  // set request configuration
+  const requestConfig: string[] = []
+  imgEls?.forEach((item) => requestConfig.push(item.src))
+
+  // Call the crawlFile API to crawl pictures
+  myXCrawl.crawlFile({ requestConfig, fileConfig: { storeDir: './upload' } })
 })
 ```
 
@@ -136,7 +138,7 @@ running result:
 
 #### An example of a crawler application
 
-Create a new **application instance** via [xCrawl()](#x-crawl-2):
+Create a new **application instance** via [xCrawl()](#xCrawl):
 
 ```js
 import xCrawl from 'x-crawl'
@@ -321,13 +323,10 @@ const myXCrawl = xCrawl({
   intervalTime: { max: 3000, min: 1000 }
 })
 
-myXCrawl. startPolling({ h: 2, m: 30 }, (count, stopPolling) => {
+myXCrawl. startPolling({ h: 2, m: 30 }, async (count, stopPolling) => {
   // will be executed every two and a half hours
   // crawlPage/crawlData/crawlFile
-  myXCrawl.crawlPage('https://xxx.com').then(res => {
-    const { jsdom, browser, page } = res
- 
-  })
+  const { jsdom, browser, page } = await myXCrawl.crawlPage('https://xxx.com')
 })
 ```
 
@@ -476,7 +475,7 @@ It can be selected according to the actual situation.
 
 ## API
 
-### x-crawl
+### xCrawl
 
 Create a crawler instance via call xCrawl. The request queue is maintained by the instance method itself, not by the instance itself.
 
@@ -515,7 +514,7 @@ crawlPage is the method of the crawler instance, usually used to crawl page.
 #### Type
 
 - Look at the [CrawlPageConfig](#CrawlPageConfig) type
-- Look at the [CrawlPage](#CrawlPage-2) type
+- Look at the [CrawlPage](#CrawlPage-1) type
 
 ```ts
 function crawlPage: (
diff --git a/docs/cn.md b/docs/cn.md
@@ -42,36 +42,36 @@ crawlPage API 内部使用 [puppeteer](https://github.com/puppeteer/puppeteer) 
     * [获取结果的多种方式](#获取结果的多种方式)
 - [API](#API)
     * [xCrawl](#xCrawl)
-       + [类型](#类型-1)
+       + [类型](#类型)
        + [示例](#示例-1)
     * [crawlPage](#crawlPage)
-       + [类型](#类型-2)
+       + [类型](#类型-1)
        + [示例](#示例-2)
     * [crawlData](#crawlData)
-       + [类型](#类型-3)
+       + [类型](#类型-2)
        + [示例](#示例-3)
     * [crawlFile](#crawlFile)
-       + [类型](#类型-4)
+       + [类型](#类型-3)
        + [示例](#示例-4)
     * [startPolling](#startPolling)
-       + [类型](#类型-5)
+       + [类型](#类型-4)
        + [示例](#示例-5)
-- [类型](#类型-6)
+- [类型](#类型-5)
     * [AnyObject](#AnyObject)
     * [Method](#Method)
     * [RequestConfigObjectV1](#RequestConfigObjectV1)
     * [RequestConfigObjectV2](#RequestConfigObjectV2)
     * [RequestConfig](#RequestConfig)
     * [IntervalTime](#IntervalTime)
     * [XCrawlBaseConfig](#XCrawlBaseConfig)
-    * [CrawlPageConfig](#CrawlPageConfig	)
+    * [CrawlPageConfig](#CrawlPageConfig)
     * [CrawlBaseConfigV1](#CrawlBaseConfigV1)
     * [CrawlDataConfig](#CrawlDataConfig) 
     * [CrawlFileConfig](#CrawlFileConfig)
     * [StartPollingConfig](#StartPollingConfig)
     * [CrawlResCommonV1](#CrawlResCommonV1)
     * [CrawlResCommonArrV1](#CrawlResCommonArrV1)
-    * [CrawlPage](#CrawlPage-2) 
+    * [CrawlPage](#CrawlPage-1) 
     * [FileInfo](#FileInfo)
 - [更多](#更多)
 
@@ -85,7 +85,7 @@ npm install x-crawl
 
 ## 示例
 
-定时爬取: 每隔一天就获取 bilibili 国漫主页的轮播图片为例: 
+每天自动获取 bilibili 国漫主页的轮播图片为例: 
 
 ```js
 // 1.导入模块 ES/CJS
@@ -99,21 +99,19 @@ const myXCrawl = xCrawl({
 
 // 3.设置爬取任务
 // 调用 startPolling API 开始轮询功能，每隔一天会调用回调函数
-myXCrawl.startPolling({ d: 1 }, () => {
+myXCrawl.startPolling({ d: 1 }, async () => {
   // 调用 crawlPage API 爬取 Page
-  myXCrawl.crawlPage('https://www.bilibili.com/guochuang/').then((res) => {
-    const { jsdom } = res // 默认使用了 JSDOM 库解析 Page
+  const { jsdom } = await myXCrawl.crawlPage('https://www.bilibili.com/guochuang/')
 
-    // 获取轮播图片元素
-    const imgEls = jsdom.window.document.querySelectorAll('.chief-recom-item img')
+  // 获取轮播图片元素
+  const imgEls = jsdom.window.document.querySelectorAll('.chief-recom-item img')
 
-    // 设置请求配置
-    const requestConfig = []
-    imgEls.forEach((item) => requestConfig.push(`https:${item.src}`))
+  // 设置请求配置
+  const requestConfig = []
+  imgEls.forEach((item) => requestConfig.push(`https:${item.src}`))
 
-    // 调用 crawlFile API 爬取图片
-    myXCrawl.crawlFile({  requestConfig, fileConfig: { storeDir: './upload' } })
-  })
+  // 调用 crawlFile API 爬取图片
+  myXCrawl.crawlFile({  requestConfig, fileConfig: { storeDir: './upload' } })
 })
 ```
 
@@ -319,12 +317,10 @@ const myXCrawl = xCrawl({
   timeout: 10000
 })
 
-myXCrawl.startPolling({ h: 2, m: 30 }, (count, stopPolling) => {
+myXCrawl.startPolling({ h: 2, m: 30 }, async (count, stopPolling) => {
   // 每隔两个半小时会执行一次
   // crawlPage/crawlData/crawlFile
-  myXCrawl.crawlPage('https://xxx.com').then(res => {
-    const { jsdom, browser, page } = res
-  })
+  const { jsdom, browser, page } = await myXCrawl.crawlPage('https://xxx.com')
 })
 ```
 
@@ -511,7 +507,7 @@ crawlPage 是爬虫实例的方法，通常用于爬取页面。
 #### 类型
 
 - 查看 [CrawlPageConfig](#CrawlPageConfig) 类型
-- 查看 [CrawlPage](#CrawlPage-2) 类型
+- 查看 [CrawlPage](#CrawlPage-1) 类型
 
 ```ts
 function crawlPage: (
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "private": true,
   "name": "x-crawl",
-  "version": "3.2.11",
+  "version": "3.2.12",
   "author": "coderHXL",
   "description": "x-crawl is a flexible nodejs crawler library.",
   "license": "MIT",
diff --git a/publish/README.md b/publish/README.md
@@ -41,20 +41,20 @@ The crawlPage API internally uses the [puppeteer](https://github.com/puppeteer/p
     * [Multiple ways of writing requestConfig options](#Multiple-ways-of-writing-requestConfig-options)
     * [Multiple ways to get results](#Multiple-ways-to-get-results)
 - [API](#API)
-    * [x-crawl](#x-crawl-2)
-       + [Type](#Type-1)
+    * [xCrawl](#xCrawl)
+       + [Type](#Type)
        + [Example](#Example-1)
     * [crawlPage](#crawlPage)
-       + [Type](#Type-2)
+       + [Type](#Type-1)
        + [Example](#Example-2)
     * [crawlData](#crawlData)
-       + [Type](#Type-3)
+       + [Type](#Type-2)
        + [Example](#Example-3)
     * [crawlFile](#crawlFile)
-       + [Type](#Type-4)
+       + [Type](#Type-3)
        + [Example](#Example-4)
     * [crawlPolling](#crawlPolling)
-       + [Type](#Type-5)
+       + [Type](#Type-4)
        + [Example](#Example-5)
 - [Types](#Types)
     * [AnyObject](#AnyObject)
@@ -64,14 +64,14 @@ The crawlPage API internally uses the [puppeteer](https://github.com/puppeteer/p
     * [RequestConfig](#RequestConfig)
     * [IntervalTime](#IntervalTime)
     * [XCrawlBaseConfig](#XCrawlBaseConfig)
-    * [CrawlPageConfig](#CrawlPageConfig	)
+    * [CrawlPageConfig](#CrawlPageConfig)
     * [CrawlBaseConfigV1](#CrawlBaseConfigV1)
     * [CrawlDataConfig](#CrawlDataConfig) 
     * [CrawlFileConfig](#CrawlFileConfig)
     * [StartPollingConfig](#StartPollingConfig)
     * [CrawlResCommonV1](#CrawlResCommonV1)
     * [CrawlResCommonArrV1](#CrawlResCommonArrV1)
-    * [CrawlPage](#CrawlPage-2) 
+    * [CrawlPage](#CrawlPage-1) 
     * [FileInfo](#FileInfo)
 - [More](#More)
 
@@ -98,23 +98,25 @@ const myXCrawl = xCrawl({
 })
 
 // 3.Set the crawling task
-// Call the startPolling API to start the polling function, and the callback function will be called every other day
-myXCrawl.startPolling({ d: 1 }, (count, stopPolling) => {
-  myXCrawl.crawlPage('https://zh.airbnb.com/s/*/plus_homes').then((res) => {
-    const { jsdom } = res // By default, the JSDOM library is used to parse Page
-
-    // Get the cover image elements for Plus listings
-    const imgEls = jsdom.window.document
-      .querySelector('.a1stauiv')
-      ?.querySelectorAll('picture img')
-
-    // set request configuration
-    const requestConfig: string[] = []
-    imgEls?.forEach((item) => requestConfig.push(item.src))
-
-    // Call the crawlFile API to crawl pictures
-    myXCrawl.crawlFile({ requestConfig, fileConfig: { storeDir: './upload' } })
-  })
+/* 
+  Call the startPolling API to start the polling function, 
+  and the callback function will be called every other day
+*/
+myXCrawl.startPolling({ d: 1 }, async (count, stopPolling) => {
+  // Call crawlPage API to crawl Page
+  const { jsdom } = await myXCrawl.crawlPage('https://zh.airbnb.com/s/*/plus_homes')
+
+  // Get the cover image elements for Plus listings
+  const imgEls = jsdom.window.document
+    .querySelector('.a1stauiv')
+    ?.querySelectorAll('picture img')
+
+  // set request configuration
+  const requestConfig: string[] = []
+  imgEls?.forEach((item) => requestConfig.push(item.src))
+
+  // Call the crawlFile API to crawl pictures
+  myXCrawl.crawlFile({ requestConfig, fileConfig: { storeDir: './upload' } })
 })
 ```
 
@@ -136,7 +138,7 @@ running result:
 
 #### An example of a crawler application
 
-Create a new **application instance** via [xCrawl()](#x-crawl-2):
+Create a new **application instance** via [xCrawl()](#xCrawl):
 
 ```js
 import xCrawl from 'x-crawl'
@@ -321,13 +323,10 @@ const myXCrawl = xCrawl({
   intervalTime: { max: 3000, min: 1000 }
 })
 
-myXCrawl. startPolling({ h: 2, m: 30 }, (count, stopPolling) => {
+myXCrawl. startPolling({ h: 2, m: 30 }, async (count, stopPolling) => {
   // will be executed every two and a half hours
   // crawlPage/crawlData/crawlFile
-  myXCrawl.crawlPage('https://xxx.com').then(res => {
-    const { jsdom, browser, page } = res
- 
-  })
+  const { jsdom, browser, page } = await myXCrawl.crawlPage('https://xxx.com')
 })
 ```
 
@@ -476,7 +475,7 @@ It can be selected according to the actual situation.
 
 ## API
 
-### x-crawl
+### xCrawl
 
 Create a crawler instance via call xCrawl. The request queue is maintained by the instance method itself, not by the instance itself.
 
@@ -515,7 +514,7 @@ crawlPage is the method of the crawler instance, usually used to crawl page.
 #### Type
 
 - Look at the [CrawlPageConfig](#CrawlPageConfig) type
-- Look at the [CrawlPage](#CrawlPage-2) type
+- Look at the [CrawlPage](#CrawlPage-1) type
 
 ```ts
 function crawlPage: (
diff --git a/publish/package.json b/publish/package.json
@@ -1,6 +1,6 @@
 {
   "name": "x-crawl",
-  "version": "3.2.11",
+  "version": "3.2.12",
   "author": "coderHXL",
   "description": "x-crawl is a flexible nodejs crawler library.",
   "license": "MIT",

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"private": true,`
`3`	`3`	`"name": "x-crawl",`
`4`		`- "version": "3.2.11",`
	`4`	`+ "version": "3.2.12",`
`5`	`5`	`"author": "coderHXL",`
`6`	`6`	`"description": "x-crawl is a flexible nodejs crawler library.",`
`7`	`7`	`"license": "MIT",`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "x-crawl",`
`3`		`- "version": "3.2.11",`
	`3`	`+ "version": "3.2.12",`
`4`	`4`	`"author": "coderHXL",`
`5`	`5`	`"description": "x-crawl is a flexible nodejs crawler library.",`
`6`	`6`	`"license": "MIT",`