@@ -13,7 +13,8 @@ import {
13
13
log ,
14
14
logError ,
15
15
logSuccess ,
16
- logWarn
16
+ logWarn ,
17
+ mkdirDirSync
17
18
} from './utils'
18
19
19
20
import {
@@ -146,7 +147,7 @@ function loaderCommonConfig(
146
147
priority = 0
147
148
}
148
149
149
- return { url, timeout, proxy, maxRetry, priority }
150
+ return { ... requestConfig , url, timeout, proxy, maxRetry, priority }
150
151
} )
151
152
152
153
// 2.intervalTime
@@ -258,6 +259,23 @@ function loaderFileConfig(
258
259
// 装载公共配置到 loaderConfig
259
260
loaderCommonConfig ( baseConfig , requestObjecs , loaderConfig )
260
261
262
+ // 装载单独的配置
263
+ loaderConfig . requestConfigs . forEach ( ( requestConfig ) => {
264
+ if (
265
+ isUndefined ( requestConfig . storeDir ) &&
266
+ ! isUndefined ( rawConfig . fileConfig ?. storeDir )
267
+ ) {
268
+ requestConfig . storeDir = rawConfig . fileConfig ! . storeDir
269
+ }
270
+
271
+ if (
272
+ isUndefined ( requestConfig . extension ) &&
273
+ ! isUndefined ( rawConfig . fileConfig ?. extension )
274
+ ) {
275
+ requestConfig . extension = rawConfig . fileConfig ! . extension
276
+ }
277
+ } )
278
+
261
279
return loaderConfig
262
280
}
263
281
@@ -494,10 +512,6 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
494
512
config
495
513
)
496
514
497
- if ( ! fs . existsSync ( fileConfig . storeDir ) ) {
498
- fs . mkdirSync ( fileConfig . storeDir )
499
- }
500
-
501
515
const controllerRes = await controller (
502
516
'file' ,
503
517
baseConfig . mode ,
@@ -517,7 +531,8 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
517
531
maxRetry,
518
532
crawlCount,
519
533
errorQueue,
520
- crawlSingleRes
534
+ crawlSingleRes,
535
+ requestConfig
521
536
} = item
522
537
523
538
const crawlRes : CrawlFileSingleRes = {
@@ -532,14 +547,47 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
532
547
533
548
if ( isSuccess && crawlSingleRes ) {
534
549
const mimeType = crawlSingleRes . headers [ 'content-type' ] ?? ''
535
- const fileExtension = fileConfig . extension ?? mimeType . split ( '/' ) . pop ( )
536
- const fileName = new Date ( ) . getTime ( ) . toString ( )
537
- const filePath = path . resolve (
538
- fileConfig . storeDir ,
539
- `${ fileName } .${ fileExtension } `
540
- )
550
+ let fileName = ''
551
+ let fileExtension = ''
541
552
542
- const saveFileItem = writeFile ( filePath , crawlSingleRes . data )
553
+ if ( ! isUndefined ( requestConfig . fileName ) ) {
554
+ fileName = requestConfig . fileName
555
+ } else {
556
+ fileName = new Date ( ) . getTime ( ) . toString ( )
557
+ }
558
+
559
+ if ( ! isUndefined ( requestConfig . extension ) ) {
560
+ fileExtension = requestConfig . extension
561
+ } else {
562
+ fileExtension = '.' + mimeType . split ( '/' ) . pop ( )
563
+ }
564
+
565
+ if (
566
+ ! isUndefined ( requestConfig . storeDir ) &&
567
+ ! fs . existsSync ( requestConfig . storeDir )
568
+ ) {
569
+ mkdirDirSync ( requestConfig . storeDir )
570
+ }
571
+
572
+ const storePath = requestConfig . storeDir ?? __dirname
573
+ const filePath = path . resolve ( storePath , fileName + fileExtension )
574
+
575
+ // 在保存前的回调
576
+ let data = crawlSingleRes . data
577
+ if ( fileConfig ?. beforeSave ) {
578
+ const newData = fileConfig . beforeSave ( {
579
+ id,
580
+ fileName,
581
+ filePath,
582
+ data
583
+ } )
584
+
585
+ if ( newData ) {
586
+ data = newData
587
+ }
588
+ }
589
+
590
+ const saveFileItem = writeFile ( filePath , data )
543
591
. catch ( ( err ) => {
544
592
const message = `File save error at id ${ id } : ${ err . message } `
545
593
const valueOf = ( ) => id
@@ -551,9 +599,18 @@ export function createCrawlFile(baseConfig: LoaderXCrawlBaseConfig) {
551
599
. then ( ( isError ) => {
552
600
const size = crawlSingleRes . data . length
553
601
const isSuccess = ! isError
554
- const fileInfo = { isSuccess, fileName, mimeType, size, filePath }
555
602
556
- crawlRes . data = { ...crawlSingleRes , data : fileInfo }
603
+ crawlRes . data = {
604
+ ...crawlSingleRes ,
605
+ data : {
606
+ isSuccess,
607
+ fileName,
608
+ fileExtension,
609
+ mimeType,
610
+ size,
611
+ filePath
612
+ }
613
+ }
557
614
558
615
if ( callback ) {
559
616
callback ( crawlRes )
0 commit comments