Skip to content

Commit fd02db0

Browse files
committed
Add: Can add launchBrowser configuration when creating a crawler application
1 parent c0bcb5f commit fd02db0

File tree

3 files changed

+43
-37
lines changed

3 files changed

+43
-37
lines changed

src/api.ts

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import {
3939
LoaderFileRequestConfig,
4040
CrawlFileConfigObject
4141
} from './types/api'
42-
import { LoaderXCrawlBaseConfig } from './types'
42+
import { LoaderXCrawlConfig } from './types'
4343

4444
async function crawlRequestSingle(
4545
controllerConfig: ControllerConfig<
@@ -98,7 +98,7 @@ function transformRequestConfig(config: any) {
9898
}
9999

100100
function loaderCommonConfig(
101-
baseConfig: LoaderXCrawlBaseConfig,
101+
xCrawlConfig: LoaderXCrawlConfig,
102102
requestObjects: (PageRequestConfig | DataRequestConfig | FileRequestConfig)[],
103103
loaderConfig:
104104
| LoaderCrawlPageConfig
@@ -110,37 +110,37 @@ function loaderCommonConfig(
110110
let { url, timeout, proxy, maxRetry, priority } = requestConfig
111111

112112
// 1.1.baseUrl
113-
if (!isUndefined(baseConfig.baseUrl)) {
114-
url = baseConfig.baseUrl + url
113+
if (!isUndefined(xCrawlConfig.baseUrl)) {
114+
url = xCrawlConfig.baseUrl + url
115115
}
116116

117117
// 1.2.timeout
118-
// requestConfig > loaderConfig > baseConfig
118+
// requestConfig > loaderConfig > xCrawlConfig
119119
if (isUndefined(timeout)) {
120120
if (!isUndefined(loaderConfig.timeout)) {
121121
timeout = loaderConfig.timeout
122122
} else {
123-
timeout = baseConfig.timeout
123+
timeout = xCrawlConfig.timeout
124124
}
125125
}
126126

127127
// 1.3.porxy
128-
// requestConfig > loaderConfig > baseConfig
128+
// requestConfig > loaderConfig > xCrawlConfig
129129
if (isUndefined(proxy)) {
130130
if (!isUndefined(loaderConfig.proxy)) {
131131
proxy = loaderConfig.proxy
132-
} else if (!isUndefined(baseConfig.proxy)) {
133-
proxy = baseConfig.proxy
132+
} else if (!isUndefined(xCrawlConfig.proxy)) {
133+
proxy = xCrawlConfig.proxy
134134
}
135135
}
136136

137137
// 1.4.maxRetry
138-
// requestConfig > loaderConfig > baseConfig
138+
// requestConfig > loaderConfig > xCrawlConfig
139139
if (isUndefined(maxRetry)) {
140140
if (!isUndefined(loaderConfig.maxRetry)) {
141141
maxRetry = loaderConfig.maxRetry
142142
} else {
143-
maxRetry = baseConfig.maxRetry
143+
maxRetry = xCrawlConfig.maxRetry
144144
}
145145
}
146146

@@ -155,14 +155,14 @@ function loaderCommonConfig(
155155
// 2.intervalTime
156156
if (
157157
isUndefined(loaderConfig.intervalTime) &&
158-
!isUndefined(baseConfig.intervalTime)
158+
!isUndefined(xCrawlConfig.intervalTime)
159159
) {
160-
loaderConfig.intervalTime = baseConfig.intervalTime
160+
loaderConfig.intervalTime = xCrawlConfig.intervalTime
161161
}
162162
}
163163

164164
function loaderPageConfig(
165-
baseConfig: LoaderXCrawlBaseConfig,
165+
xCrawlConfig: LoaderXCrawlConfig,
166166
rawConfig: CrawlPageConfig
167167
): LoaderCrawlPageConfig {
168168
const loaderConfig: LoaderCrawlPageConfig = { requestConfigs: [] }
@@ -192,7 +192,7 @@ function loaderPageConfig(
192192
}
193193

194194
// 装载公共配置到 loaderConfig
195-
loaderCommonConfig(baseConfig, requestObjects, loaderConfig)
195+
loaderCommonConfig(xCrawlConfig, requestObjects, loaderConfig)
196196

197197
// 装载单独的配置
198198
if (!isUndefined(loaderConfig.cookies)) {
@@ -211,7 +211,7 @@ function loaderPageConfig(
211211
}
212212

213213
function loaderDataConfig(
214-
baseConfig: LoaderXCrawlBaseConfig,
214+
xCrawlConfig: LoaderXCrawlConfig,
215215
rawConfig: CrawlDataConfig
216216
): LoaderCrawlDataConfig {
217217
const loaderConfig: LoaderCrawlDataConfig = { requestConfigs: [] }
@@ -240,13 +240,13 @@ function loaderDataConfig(
240240
}
241241

242242
// 装载公共配置到 loaderConfig
243-
loaderCommonConfig(baseConfig, requestObjects, loaderConfig)
243+
loaderCommonConfig(xCrawlConfig, requestObjects, loaderConfig)
244244

245245
return loaderConfig
246246
}
247247

248248
function loaderFileConfig(
249-
baseConfig: LoaderXCrawlBaseConfig,
249+
xCrawlConfig: LoaderXCrawlConfig,
250250
rawConfig: CrawlFileConfig
251251
): LoaderCrawlFileConfig {
252252
const loaderConfig: LoaderCrawlFileConfig = { requestConfigs: [] }
@@ -280,7 +280,7 @@ function loaderFileConfig(
280280
}
281281

282282
// 装载公共配置到 loaderConfig
283-
loaderCommonConfig(baseConfig, requestObjects, loaderConfig)
283+
loaderCommonConfig(xCrawlConfig, requestObjects, loaderConfig)
284284

285285
// 装载单独的配置
286286
if (
@@ -307,7 +307,7 @@ function loaderFileConfig(
307307
return loaderConfig
308308
}
309309

310-
export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
310+
export function createCrawlPage(xCrawlConfig: LoaderXCrawlConfig) {
311311
let browser: Browser | null = null
312312
let createBrowserPending: Promise<void> | null = null
313313
let haveCreateBrowser = false
@@ -346,9 +346,11 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
346346
// 创建浏览器
347347
if (!haveCreateBrowser) {
348348
haveCreateBrowser = true
349-
createBrowserPending = puppeteer.launch().then((res) => {
350-
browser = res
351-
})
349+
createBrowserPending = puppeteer
350+
.launch(xCrawlConfig.crawlPage?.launchBrowser)
351+
.then((res) => {
352+
browser = res
353+
})
352354
}
353355

354356
// 等待浏览器创建完毕
@@ -358,15 +360,15 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
358360
if (createBrowserPending) createBrowserPending = null
359361
}
360362

361-
// 合并 baseConfig 配置
363+
// 合并 xCrawlConfig 配置
362364
const { requestConfigs, intervalTime } = loaderPageConfig(
363-
baseConfig,
365+
xCrawlConfig,
364366
config
365367
)
366368

367369
const controllerRes = await controller(
368370
'page',
369-
baseConfig.mode,
371+
xCrawlConfig.mode,
370372
requestConfigs,
371373
intervalTime,
372374
cId,
@@ -482,7 +484,7 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
482484
return crawlPage
483485
}
484486

485-
export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
487+
export function createCrawlData(xCrawlConfig: LoaderXCrawlConfig) {
486488
function crawlData<T = any>(
487489
config: string,
488490
callback?: (res: CrawlDataSingleRes<T>) => void
@@ -508,13 +510,13 @@ export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
508510
callback?: (res: CrawlDataSingleRes<T>) => void
509511
): Promise<CrawlDataSingleRes<T> | CrawlDataSingleRes<T>[]> {
510512
const { requestConfigs, intervalTime } = loaderDataConfig(
511-
baseConfig,
513+
xCrawlConfig,
512514
config
513515
)
514516

515517
const controllerRes = await controller(
516518
'data',
517-
baseConfig.mode,
519+
xCrawlConfig.mode,
518520
requestConfigs,
519521
intervalTime,
520522
undefined,
@@ -570,7 +572,7 @@ export function createCrawlData(baseConfig: LoaderXCrawlBaseConfig) {
570572
return crawlData
571573
}
572574

573-
export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
575+
export function createCrawlFile(xCrawlConfig: LoaderXCrawlConfig) {
574576
function crawlFile(
575577
config: FileRequestConfig,
576578
callback?: (res: CrawlFileSingleRes) => void
@@ -591,13 +593,13 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
591593
callback?: (res: CrawlFileSingleRes) => void
592594
): Promise<CrawlFileSingleRes | CrawlFileSingleRes[]> {
593595
const { requestConfigs, intervalTime, fileConfig } = loaderFileConfig(
594-
baseConfig,
596+
xCrawlConfig,
595597
config
596598
)
597599

598600
const controllerRes = await controller(
599601
'file',
600-
baseConfig.mode,
602+
xCrawlConfig.mode,
601603
requestConfigs,
602604
intervalTime,
603605
undefined,

src/index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ import {
77

88
import {
99
LoaderXCrawlBaseConfig,
10-
XCrawlBaseConfig,
10+
XCrawlConfig,
1111
XCrawlInstance
1212
} from './types'
1313
import { isUndefined } from './utils'
1414

1515
function loaderBaseConfig(
16-
baseConfig: XCrawlBaseConfig | undefined
16+
baseConfig: XCrawlConfig | undefined
1717
): LoaderXCrawlBaseConfig {
1818
const loaderBaseConfig = baseConfig ? baseConfig : {}
1919

@@ -43,7 +43,7 @@ function createnInstance(baseConfig: LoaderXCrawlBaseConfig): XCrawlInstance {
4343
return instance
4444
}
4545

46-
export default function xCrawl(baseConfig?: XCrawlBaseConfig): XCrawlInstance {
46+
export default function xCrawl(baseConfig?: XCrawlConfig): XCrawlInstance {
4747
const newBaseConfig = loaderBaseConfig(baseConfig)
4848

4949
const instance = createnInstance(newBaseConfig)

src/types/index.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { PuppeteerLaunchOptions } from 'puppeteer'
12
import {
23
StartPollingConfig,
34
IntervalTime,
@@ -12,16 +13,19 @@ import {
1213
CrawlPageConfigObject
1314
} from './api'
1415

15-
export interface XCrawlBaseConfig {
16+
export interface XCrawlConfig {
1617
baseUrl?: string
1718
timeout?: number
1819
intervalTime?: IntervalTime
1920
mode?: 'async' | 'sync'
2021
proxy?: string
2122
maxRetry?: number
23+
crawlPage?: {
24+
launchBrowser?: PuppeteerLaunchOptions
25+
}
2226
}
2327

24-
export type LoaderXCrawlBaseConfig = XCrawlBaseConfig & {
28+
export type LoaderXCrawlConfig = XCrawlConfig & {
2529
mode: 'async' | 'sync'
2630
timeout: number
2731
maxRetry: number

0 commit comments

Comments
 (0)