@@ -82,16 +82,16 @@ function parsePageCookies(
82
82
return cookiesArr
83
83
}
84
84
85
- function transformToCrawlObjects (
85
+ function transformToCrawlDetails (
86
86
config : string | CrawlPageDetailConfig | ( string | CrawlPageDetailConfig ) [ ]
87
87
) : CrawlPageDetailConfig [ ]
88
- function transformToCrawlObjects (
88
+ function transformToCrawlDetails (
89
89
config : string | CrawlDataDetailConfig | ( string | CrawlDataDetailConfig ) [ ]
90
90
) : CrawlDataDetailConfig [ ]
91
- function transformToCrawlObjects (
91
+ function transformToCrawlDetails (
92
92
config : ( string | CrawlFileDetailConfig ) [ ]
93
93
) : CrawlFileDetailConfig [ ]
94
- function transformToCrawlObjects ( config : any ) {
94
+ function transformToCrawlDetails ( config : any ) {
95
95
return isArray ( config )
96
96
? config . map ( ( item ) => ( isObject ( item ) ? item : { url : item } ) )
97
97
: [ isObject ( config ) ? config : { url : config } ]
@@ -117,7 +117,7 @@ function loaderCommonConfig(
117
117
// 1.rawCrawlDetails
118
118
rawCrawlDetails . forEach ( ( detail ) => {
119
119
// detail > API > xCrawl
120
- let { url, timeout, proxy, maxRetry, priority } = detail
120
+ let { url, timeout, proxy, maxRetry, priority, headers } = detail
121
121
122
122
// 1.1.baseUrl
123
123
if ( ! isUndefined ( xCrawlConfig . baseUrl ) ) {
@@ -156,13 +156,19 @@ function loaderCommonConfig(
156
156
priority = 0
157
157
}
158
158
159
+ // 1.6.header
160
+ if ( isUndefined ( headers ) ) {
161
+ headers = crawlAPIConfig . headers
162
+ }
163
+
159
164
crawlDetails . push ( {
160
165
...detail ,
161
166
url,
162
167
timeout,
163
168
proxy,
164
169
maxRetry,
165
- priority
170
+ priority,
171
+ headers
166
172
} )
167
173
} )
168
174
@@ -187,17 +193,17 @@ function loaderPageConfig(
187
193
// 统一转成 CrawlPageDetailConfig 类型
188
194
if ( isObject ( rawConfig ) && Object . hasOwn ( rawConfig , 'crawlPages' ) ) {
189
195
// CrawlPageEnhanceConfig 处理
190
- const { crawlPages, proxy, timeout, cookies, intervalTime, maxRetry } =
191
- rawConfig as CrawlPageEnhanceConfig
196
+ const { crawlPages } = rawConfig as CrawlPageEnhanceConfig
192
197
193
198
// 给 crawlPageConfig 装载 CrawlPageEnhanceConfig
194
- crawlPageConfig . proxy = proxy
195
- crawlPageConfig . cookies = cookies
196
- crawlPageConfig . intervalTime = intervalTime
197
- crawlPageConfig . maxRetry = maxRetry
198
- crawlPageConfig . timeout = timeout
199
+ const rawConfigMap : any = rawConfig
200
+ const crawlPageConfigMap : any = crawlPageConfig
201
+ Object . keys ( rawConfig as CrawlPageEnhanceConfig ) . forEach ( ( key ) => {
202
+ if ( key === 'crawlPages' ) return
203
+ crawlPageConfigMap [ key ] = rawConfigMap [ key ]
204
+ } )
199
205
200
- rawCrawlPageDetails . push ( ...transformToCrawlObjects ( crawlPages ) )
206
+ rawCrawlPageDetails . push ( ...transformToCrawlDetails ( crawlPages ) )
201
207
} else {
202
208
// string | CrawlPageDetailConfig | (string | CrawlPageDetailConfig)[] 处理
203
209
const transformRes = transformToCrawlObjects (
@@ -246,26 +252,27 @@ function loaderDataConfig(
246
252
// 统一转成 CrawlDataDetailConfig 类型
247
253
if ( isObject ( rawConfig ) && Object . hasOwn ( rawConfig , 'crawlDatas' ) ) {
248
254
// CrawlDataEnhanceConfig 处理
249
- const { crawlDatas, proxy, timeout, intervalTime, maxRetry } =
250
- rawConfig as CrawlDataEnhanceConfig
255
+ const { crawlDatas } = rawConfig as CrawlDataEnhanceConfig
251
256
252
257
// 给 crawlDataConfig 装载 crawlDataEnhanceConfig
253
- crawlDataConfig . proxy = proxy
254
- crawlDataConfig . intervalTime = intervalTime
255
- crawlDataConfig . maxRetry = maxRetry
256
- crawlDataConfig . timeout = timeout
258
+ const rawConfigMap : any = rawConfig
259
+ const crawlDataConfigMap : any = crawlDataConfig
260
+ Object . keys ( rawConfig as CrawlDataEnhanceConfig ) . forEach ( ( key ) => {
261
+ if ( key === 'crawlDatas' ) return
262
+ crawlDataConfigMap [ key ] = rawConfigMap [ key ]
263
+ } )
257
264
258
- rawCrawlDataDetails . push ( ...transformToCrawlObjects ( crawlDatas ) )
265
+ rawCrawlDataDetails . push ( ...transformToCrawlDetails ( crawlDatas ) )
259
266
} else {
260
267
// string | CrawlDataDetailConfig | (string | CrawlDataDetailConfig)[] 处理
261
- const transformRes = transformToCrawlObjects (
268
+ const transformRes = transformToCrawlDetails (
262
269
rawConfig as
263
270
| string
264
271
| CrawlDataDetailConfig
265
272
| ( string | CrawlDataDetailConfig ) [ ]
266
273
)
267
274
268
- rawCrawlDataDetails . push ( ...transformToCrawlObjects ( transformRes ) )
275
+ rawCrawlDataDetails . push ( ...transformToCrawlDetails ( transformRes ) )
269
276
}
270
277
271
278
// 装载公共配置
@@ -291,17 +298,17 @@ function loaderFileConfig(
291
298
// 统一转成 CrawlFileDetailConfig 类型
292
299
if ( isObject ( rawConfig ) && Object . hasOwn ( rawConfig , 'crawlFiles' ) ) {
293
300
// CrawlFileMoreConfig 处理
294
- const { crawlFiles, proxy, timeout, intervalTime, maxRetry, fileConfig } =
295
- rawConfig as CrawlFileEnhanceConfig
301
+ const { crawlFiles } = rawConfig as CrawlFileEnhanceConfig
296
302
297
303
// 给 crawlFileConfig 装载 crawlFileMoreConfig
298
- crawlFileConfig . proxy = proxy
299
- crawlFileConfig . intervalTime = intervalTime
300
- crawlFileConfig . maxRetry = maxRetry
301
- crawlFileConfig . timeout = timeout
302
- crawlFileConfig . fileConfig = fileConfig
304
+ const rawConfigMap : any = rawConfig
305
+ const crawlFileConfigMap : any = crawlFileConfig
306
+ Object . keys ( rawConfig as CrawlFileEnhanceConfig ) . forEach ( ( key ) => {
307
+ if ( key === 'crawlFiles' ) return
308
+ crawlFileConfigMap [ key ] = rawConfigMap [ key ]
309
+ } )
303
310
304
- rawCrawlFileDetails . push ( ...transformToCrawlObjects ( crawlFiles ) )
311
+ rawCrawlFileDetails . push ( ...transformToCrawlDetails ( crawlFiles ) )
305
312
} else {
306
313
// CrawlFileDetailConfig | CrawlFileDetailConfig[] 处理
307
314
rawCrawlFileDetails . push (
@@ -317,26 +324,26 @@ function loaderFileConfig(
317
324
crawlFileConfig . crawlFileDetails
318
325
)
319
326
320
- // 装载单独的配置
327
+ // 装载单独配置
321
328
if (
322
- ! isUndefined ( crawlFileConfig . fileConfig ?. storeDir ) ||
323
- ! isUndefined ( crawlFileConfig . fileConfig ?. extension )
329
+ ! isUndefined ( crawlFileConfig ?. storeDir ) ||
330
+ ! isUndefined ( crawlFileConfig ?. extension )
324
331
) {
325
332
crawlFileConfig . crawlFileDetails . forEach ( ( fileSingleConfig ) => {
326
333
// 1.storeDir
327
334
if (
328
335
isUndefined ( fileSingleConfig . storeDir ) &&
329
- ! isUndefined ( crawlFileConfig . fileConfig ?. storeDir )
336
+ ! isUndefined ( crawlFileConfig ?. storeDir )
330
337
) {
331
- fileSingleConfig . storeDir = crawlFileConfig . fileConfig ! . storeDir
338
+ fileSingleConfig . storeDir = crawlFileConfig ! . storeDir
332
339
}
333
340
334
341
// 2.extension
335
342
if (
336
343
isUndefined ( fileSingleConfig . extension ) &&
337
- ! isUndefined ( crawlFileConfig . fileConfig ?. extension )
344
+ ! isUndefined ( crawlFileConfig ?. extension )
338
345
) {
339
- fileSingleConfig . extension = crawlFileConfig . fileConfig ! . extension
346
+ fileSingleConfig . extension = crawlFileConfig ! . extension
340
347
}
341
348
} )
342
349
}
@@ -632,10 +639,8 @@ export function createCrawlFile(xCrawlConfig: LoaderXCrawlConfig) {
632
639
config : UniteCrawlFileConfig ,
633
640
callback ?: ( res : CrawlFileSingleRes ) => void
634
641
) : Promise < CrawlFileSingleRes | CrawlFileSingleRes [ ] > {
635
- const { crawlFileDetails, intervalTime, fileConfig } = loaderFileConfig (
636
- xCrawlConfig ,
637
- config
638
- )
642
+ const { crawlFileDetails, intervalTime, onBeforeSaveFile } =
643
+ loaderFileConfig ( xCrawlConfig , config )
639
644
640
645
const controllerRes = await controller (
641
646
'file' ,
@@ -691,8 +696,8 @@ export function createCrawlFile(xCrawlConfig: LoaderXCrawlConfig) {
691
696
// 在保存前的回调
692
697
const data = crawlSingleRes . data
693
698
let dataPromise = Promise . resolve ( data )
694
- if ( fileConfig ?. beforeSave ) {
695
- dataPromise = fileConfig . beforeSave ( {
699
+ if ( onBeforeSaveFile ) {
700
+ dataPromise = onBeforeSaveFile ( {
696
701
id,
697
702
fileName,
698
703
filePath,
0 commit comments