1
1
import { isNumber , isUndefined , log , logNumber , random , sleep } from './utils'
2
2
3
- import {
4
- LoaderCrawlDataDetail ,
5
- LoaderCrawlFileDetail ,
6
- LoaderCrawlPageDetail
7
- } from './api'
8
-
9
- import type { IntervalTime } from './types/api'
10
- import type { ControllerConfig , CrawlDetail } from './controller'
3
+ import type { ExtraCommonConfig } from './api'
4
+ import type { DetailInfo , CrawlDetail } from './controller'
11
5
12
6
async function useSleepByBatch (
13
7
isHaventervalTime : boolean ,
@@ -32,21 +26,27 @@ async function useSleepByBatch(
32
26
}
33
27
}
34
28
35
- export async function asyncBatchCrawl < T extends CrawlDetail , V , C > (
36
- controllerConfigs : ControllerConfig < T , V > [ ] ,
37
- crawlSingleFnExtra : C ,
38
- intervalTime : IntervalTime | undefined ,
39
- crawlSingleFn : (
40
- controllerConfig : ControllerConfig < T , V > ,
41
- crawlSingleFnExtra : C
42
- ) => Promise < V >
29
+ export async function asyncBatchCrawl <
30
+ T extends CrawlDetail ,
31
+ E extends ExtraCommonConfig ,
32
+ R
33
+ > (
34
+ detailInfos : DetailInfo < T , R > [ ] ,
35
+ extraConfig : E ,
36
+ singleCrawlHandle : (
37
+ detailInfo : DetailInfo < T , R > ,
38
+ extraConfig : E
39
+ ) => Promise < R > ,
40
+ singleResultHandle : ( detailInfo : DetailInfo < T , R > , extraConfig : E ) => void
43
41
) {
42
+ const { intervalTime } = extraConfig
43
+
44
44
const isHaventervalTime = ! isUndefined ( intervalTime )
45
45
const isNumberIntervalTime = isNumber ( intervalTime )
46
46
47
- const crawlQueue : Promise < any > [ ] = [ ]
48
- for ( const controllerConfig of controllerConfigs ) {
49
- const { id } = controllerConfig
47
+ const crawlPendingQueue : Promise < any > [ ] = [ ]
48
+ for ( const detaileInfo of detailInfos ) {
49
+ const { id } = detaileInfo
50
50
51
51
await useSleepByBatch (
52
52
isHaventervalTime ,
@@ -55,41 +55,53 @@ export async function asyncBatchCrawl<T extends CrawlDetail, V, C>(
55
55
id
56
56
)
57
57
58
- controllerConfig . crawlCount ++
59
-
60
- const crawlSingle = crawlSingleFn ( controllerConfig , crawlSingleFnExtra )
58
+ const crawlSinglePending = singleCrawlHandle ( detaileInfo , extraConfig )
61
59
. catch ( ( error ) => {
62
- controllerConfig . errorQueue . push ( error )
60
+ detaileInfo . crawlErrorQueue . push ( error )
63
61
return false
64
62
} )
65
- . then ( ( crawlSingleRes ) => {
66
- if ( crawlSingleRes === false ) return
63
+ . then ( ( detailTargetRes ) => {
64
+ if ( typeof detailTargetRes === 'boolean' ) {
65
+ if ( detaileInfo . retryCount === detaileInfo . maxRetry ) {
66
+ singleResultHandle ( detaileInfo , extraConfig )
67
+ }
68
+
69
+ return
70
+ }
71
+
72
+ detaileInfo . isSuccess = true
73
+ detaileInfo . detailTargetRes = detailTargetRes
67
74
68
- controllerConfig . isSuccess = true
69
- controllerConfig . crawlSingleRes = crawlSingleRes as V
75
+ singleResultHandle ( detaileInfo , extraConfig )
70
76
} )
71
77
72
- crawlQueue . push ( crawlSingle )
78
+ crawlPendingQueue . push ( crawlSinglePending )
73
79
}
74
80
75
81
// 等待所有爬取结束
76
- await Promise . all ( crawlQueue )
82
+ await Promise . all ( crawlPendingQueue )
77
83
}
78
84
79
- export async function syncBatchCrawl < T extends CrawlDetail , V , C > (
80
- controllerConfigs : ControllerConfig < T , V > [ ] ,
81
- crawlSingleFnExtra : C ,
82
- intervalTime : IntervalTime | undefined ,
83
- crawlSingleFn : (
84
- controllerConfig : ControllerConfig < T , V > ,
85
- crawlSingleFnExtra : C
86
- ) => Promise < V >
85
+ export async function syncBatchCrawl <
86
+ T extends CrawlDetail ,
87
+ E extends ExtraCommonConfig ,
88
+ R
89
+ > (
90
+ detailInfos : DetailInfo < T , R > [ ] ,
91
+ extraConfig : E ,
92
+ singleCrawlHandle : (
93
+ detaileInfo : DetailInfo < T , R > ,
94
+ extraConfig : E
95
+ ) => Promise < R > ,
96
+ singleResultHandle : ( detaileInfo : DetailInfo < T , R > , extraConfig : E ) => void
87
97
) {
98
+ const { intervalTime } = extraConfig
99
+
88
100
const isHaventervalTime = ! isUndefined ( intervalTime )
89
101
const isNumberIntervalTime = isNumber ( intervalTime )
90
102
91
- for ( const controllerConfig of controllerConfigs ) {
92
- const { id } = controllerConfig
103
+ for ( const detailInfo of detailInfos ) {
104
+ const { id } = detailInfo
93
105
94
106
await useSleepByBatch (
95
107
isHaventervalTime ,
@@ -98,16 +110,18 @@ export async function syncBatchCrawl<T extends CrawlDetail, V, C>(
98
110
id
99
111
)
100
112
101
- controllerConfig . crawlCount ++
102
-
103
113
try {
104
- controllerConfig . crawlSingleRes = await crawlSingleFn (
105
- controllerConfig ,
106
- crawlSingleFnExtra
114
+ detailInfo . detailTargetRes = await singleCrawlHandle (
115
+ detailInfo ,
116
+ extraConfig
107
117
)
108
- controllerConfig . isSuccess = true
118
+ detailInfo . isSuccess = true
109
119
} catch ( error : any ) {
110
- controllerConfig . errorQueue . push ( error )
120
+ detailInfo . crawlErrorQueue . push ( error )
121
+ }
122
+
123
+ if ( detailInfo . isSuccess || detailInfo . retryCount === detailInfo . maxRetry ) {
124
+ singleResultHandle ( detailInfo , extraConfig )
111
125
}
112
126
}
113
127
}
0 commit comments