Skip to content

Commit ab4a0ad

Browse files
committed
RequestConfig-like changes to DetailConfig, ConfigObject-like changes to EnhanceConfig. Other property configuration changes. Internal adjustment.
1 parent fd02db0 commit ab4a0ad

File tree

12 files changed

+336
-311
lines changed

12 files changed

+336
-311
lines changed

src/api.ts

Lines changed: 218 additions & 178 deletions
Large diffs are not rendered by default.

src/batchCrawlHandle.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ import { isNumber, isUndefined, log, logNumber, random, sleep } from './utils'
22

33
import type {
44
IntervalTime,
5-
LoaderDataRequestConfig,
6-
LoaderFileRequestConfig,
7-
LoaderPageRequestConfig
5+
LoaderCrawlDataDetail,
6+
LoaderCrawlFileDetail,
7+
LoaderCrawlPageDetail
88
} from './types/api'
99
import type { ControllerConfig } from './controller'
1010

@@ -33,9 +33,9 @@ async function useSleepByBatch(
3333

3434
export async function asyncBatchCrawl<
3535
T extends
36-
| LoaderPageRequestConfig
37-
| LoaderDataRequestConfig
38-
| LoaderFileRequestConfig,
36+
| LoaderCrawlPageDetail
37+
| LoaderCrawlDataDetail
38+
| LoaderCrawlFileDetail,
3939
V,
4040
C
4141
>(
@@ -87,9 +87,9 @@ export async function asyncBatchCrawl<
8787

8888
export async function syncBatchCrawl<
8989
T extends
90-
| LoaderPageRequestConfig
91-
| LoaderDataRequestConfig
92-
| LoaderFileRequestConfig,
90+
| LoaderCrawlPageDetail
91+
| LoaderCrawlDataDetail
92+
| LoaderCrawlFileDetail,
9393
V,
9494
C
9595
>(

src/controller.ts

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,39 @@ import { asyncBatchCrawl, syncBatchCrawl } from './batchCrawlHandle'
22
import { priorityQueueMergeSort } from './sort'
33
import {
44
IntervalTime,
5-
LoaderDataRequestConfig,
6-
LoaderFileRequestConfig,
7-
LoaderPageRequestConfig
5+
LoaderCrawlDataDetail,
6+
LoaderCrawlFileDetail,
7+
LoaderCrawlPageDetail
88
} from './types/api'
99
import { log, logError, logNumber, logSuccess, logWarn } from './utils'
1010

1111
export interface ControllerConfig<
1212
T extends
13-
| LoaderPageRequestConfig
14-
| LoaderDataRequestConfig
15-
| LoaderFileRequestConfig,
13+
| LoaderCrawlPageDetail
14+
| LoaderCrawlDataDetail
15+
| LoaderCrawlFileDetail,
1616
V
1717
> {
1818
id: number
1919
isSuccess: boolean
2020
crawlCount: number
2121
maxRetry: number
2222
errorQueue: Error[]
23-
requestConfig: T
23+
crawlDetailConfig: T
2424
crawlSingleRes: V | null
2525
}
2626

2727
export async function controller<
2828
T extends
29-
| LoaderPageRequestConfig
30-
| LoaderDataRequestConfig
31-
| LoaderFileRequestConfig,
29+
| LoaderCrawlPageDetail
30+
| LoaderCrawlDataDetail
31+
| LoaderCrawlFileDetail,
3232
V,
3333
C
3434
>(
3535
name: 'page' | 'data' | 'file',
3636
mode: 'async' | 'sync',
37-
requestConfigs: T[],
37+
crawlSingleConfigs: T[],
3838
intervalTime: IntervalTime | undefined,
3939
crawlSingleFnExtraConfig: C,
4040
crawlSingleFn: (
@@ -43,27 +43,27 @@ export async function controller<
4343
) => Promise<V>
4444
): Promise<ControllerConfig<T, V>[]> {
4545
// 是否使用优先爬取
46-
const isPriorityCrawl = !requestConfigs.every(
47-
(item) => item.priority === requestConfigs[0].priority
46+
const isPriorityCrawl = !crawlSingleConfigs.every(
47+
(item) => item.priority === crawlSingleConfigs[0].priority
4848
)
4949
const targetRequestConfigs = isPriorityCrawl
5050
? priorityQueueMergeSort(
51-
requestConfigs.map((item) => ({
51+
crawlSingleConfigs.map((item) => ({
5252
...item,
5353
valueOf: () => item.priority
5454
}))
5555
)
56-
: requestConfigs
56+
: crawlSingleConfigs
5757

5858
// 通过映射生成新的配置数组
5959
const controllerConfigs: ControllerConfig<T, V>[] = targetRequestConfigs.map(
60-
(requestConfig, index) => ({
60+
(crawlDetailConfig, index) => ({
6161
id: index + 1,
6262
isSuccess: false,
63-
maxRetry: requestConfig.maxRetry,
63+
maxRetry: crawlDetailConfig.maxRetry,
6464
crawlCount: 0,
6565
errorQueue: [],
66-
requestConfig,
66+
crawlDetailConfig,
6767
crawlSingleRes: null
6868
})
6969
)

src/index.ts

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,12 @@ import {
55
startPolling
66
} from './api'
77

8-
import {
9-
LoaderXCrawlBaseConfig,
10-
XCrawlConfig,
11-
XCrawlInstance
12-
} from './types'
8+
import { LoaderXCrawlConfig, XCrawlConfig, XCrawlInstance } from './types'
139
import { isUndefined } from './utils'
1410

1511
function loaderBaseConfig(
1612
baseConfig: XCrawlConfig | undefined
17-
): LoaderXCrawlBaseConfig {
13+
): LoaderXCrawlConfig {
1814
const loaderBaseConfig = baseConfig ? baseConfig : {}
1915

2016
if (!loaderBaseConfig.mode) {
@@ -29,10 +25,10 @@ function loaderBaseConfig(
2925
loaderBaseConfig.maxRetry = 0
3026
}
3127

32-
return loaderBaseConfig as LoaderXCrawlBaseConfig
28+
return loaderBaseConfig as LoaderXCrawlConfig
3329
}
3430

35-
function createnInstance(baseConfig: LoaderXCrawlBaseConfig): XCrawlInstance {
31+
function createnInstance(baseConfig: LoaderXCrawlConfig): XCrawlInstance {
3632
const instance: XCrawlInstance = {
3733
crawlPage: createCrawlPage(baseConfig),
3834
crawlData: createCrawlData(baseConfig),

src/request.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import HttpsProxyAgent from 'https-proxy-agent'
1111
import { isUndefined } from './utils'
1212

1313
import { AnyObject, MapTypeEmptyObject } from './types/common'
14-
import { LoaderDataRequestConfig, LoaderFileRequestConfig } from './types/api'
14+
import { LoaderCrawlDataDetail, LoaderCrawlFileDetail } from './types/api'
1515

1616
/* Type */
1717
export interface Request {
@@ -36,7 +36,7 @@ function parseParams(urlSearch: string, params?: AnyObject): string {
3636
}
3737

3838
function parseHeaders(
39-
rawConfig: LoaderDataRequestConfig & LoaderFileRequestConfig,
39+
rawConfig: LoaderCrawlDataDetail & LoaderCrawlFileDetail,
4040
config: RequestOptions & MapTypeEmptyObject<URL>
4141
) {
4242
const rawHeaders = rawConfig.headers ?? {}
@@ -55,7 +55,7 @@ function parseHeaders(
5555
}
5656

5757
function handleRequestConfig(
58-
rawConfig: LoaderDataRequestConfig & LoaderFileRequestConfig
58+
rawConfig: LoaderCrawlDataDetail & LoaderCrawlFileDetail
5959
): RequestOptions & MapTypeEmptyObject<URL> {
6060
const { protocol, hostname, port, pathname, search } = new Url.URL(
6161
rawConfig.url
@@ -85,9 +85,7 @@ function handleRequestConfig(
8585
return config
8686
}
8787

88-
export function request(
89-
config: LoaderDataRequestConfig & LoaderFileRequestConfig
90-
) {
88+
export function request(config: LoaderCrawlDataDetail & LoaderCrawlFileDetail) {
9189
return new Promise<Request>((resolve, reject) => {
9290
const isDataUndefine = isUndefined(config.data)
9391
config.data = !isDataUndefine ? JSON.stringify(config.data) : config.data

src/types/api.ts

Lines changed: 53 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -10,44 +10,44 @@ type LoaderHasConfig = {
1010
priority: number
1111
}
1212

13-
export type LoaderPageRequestConfig = PageRequestConfig & LoaderHasConfig
13+
export type LoaderCrawlPageDetail = CrawlPageDetailConfig & LoaderHasConfig
1414

15-
export type LoaderDataRequestConfig = DataRequestConfig & LoaderHasConfig
15+
export type LoaderCrawlDataDetail = CrawlDataDetailConfig & LoaderHasConfig
1616

17-
export type LoaderFileRequestConfig = FileRequestConfig & LoaderHasConfig
17+
export type LoaderCrawlFileDetail = CrawlFileDetailConfig & LoaderHasConfig
1818

1919
export interface LoaderCrawlPageConfig
20-
extends MapTypeObject<CrawlPageConfigObject, 'requestConfigs'> {
21-
requestConfigs: LoaderPageRequestConfig[]
20+
extends MapTypeObject<CrawlPageEnhanceConfig, 'crawlPages'> {
21+
crawlPageDetails: LoaderCrawlPageDetail[]
2222
}
2323

2424
export interface LoaderCrawlDataConfig
25-
extends MapTypeObject<CrawlDataConfigObject, 'requestConfigs'> {
26-
requestConfigs: LoaderDataRequestConfig[]
25+
extends MapTypeObject<CrawlDataEnhanceConfig, 'crawlDatas'> {
26+
crawlDataDetails: LoaderCrawlDataDetail[]
2727
}
2828

2929
export interface LoaderCrawlFileConfig
30-
extends MapTypeObject<CrawlFileConfigObject, 'requestConfigs'> {
31-
requestConfigs: LoaderFileRequestConfig[]
30+
extends MapTypeObject<CrawlFileEnhanceConfig, 'crawlFiles'> {
31+
crawlFileDetails: LoaderCrawlFileDetail[]
3232
}
3333

34-
// Function overloading crawl config
35-
export type CrawlPageConfig =
34+
/* Function overloading crawl config */
35+
export type UniteCrawlPageConfig =
3636
| string
37-
| PageRequestConfig
38-
| (string | PageRequestConfig)[]
39-
| CrawlPageConfigObject
37+
| CrawlPageDetailConfig
38+
| (string | CrawlPageDetailConfig)[]
39+
| CrawlPageEnhanceConfig
4040

41-
export type CrawlDataConfig =
41+
export type UniteCrawlDataConfig =
4242
| string
43-
| DataRequestConfig
44-
| (string | DataRequestConfig)[]
45-
| CrawlDataConfigObject
43+
| CrawlDataDetailConfig
44+
| (string | CrawlDataDetailConfig)[]
45+
| CrawlDataEnhanceConfig
4646

47-
export type CrawlFileConfig =
48-
| FileRequestConfig
49-
| FileRequestConfig[]
50-
| CrawlFileConfigObject
47+
export type UniteCrawlFileConfig =
48+
| CrawlFileDetailConfig
49+
| CrawlFileDetailConfig[]
50+
| CrawlFileEnhanceConfig
5151

5252
/* API Config */
5353
// API Config Other
@@ -75,70 +75,64 @@ export type Method =
7575
| 'unlink'
7676
| 'UNLINK'
7777

78-
export type PageRequestConfigCookies =
78+
export type PageCookies =
7979
| string
8080
| Protocol.Network.CookieParam
8181
| Protocol.Network.CookieParam[]
8282

83-
// API Config Request
84-
export interface PageRequestConfig {
85-
url: string
86-
headers?: AnyObject
83+
// API crawl config
84+
// Common
85+
export interface CrawlCommonConfig {
8786
timeout?: number
8887
proxy?: string
89-
cookies?: PageRequestConfigCookies
9088
maxRetry?: number
89+
}
90+
91+
// 1.Crawl page config
92+
export interface CrawlPageDetailConfig extends CrawlCommonConfig {
93+
url: string
94+
headers?: AnyObject
95+
cookies?: PageCookies
9196
priority?: number
9297
}
9398

94-
export interface DataRequestConfig {
99+
export interface CrawlPageEnhanceConfig extends CrawlCommonConfig {
100+
crawlPages: (string | CrawlPageDetailConfig)[]
101+
intervalTime?: IntervalTime
102+
103+
// page common attribute
104+
cookies?: PageCookies
105+
}
106+
107+
// 2.Crawl data config
108+
export interface CrawlDataDetailConfig extends CrawlCommonConfig {
95109
url: string
96110
method?: Method
97111
headers?: AnyObject
98112
params?: AnyObject
99113
data?: any
100-
timeout?: number
101-
proxy?: string
102-
maxRetry?: number
103114
priority?: number
104115
}
105116

106-
export interface FileRequestConfig {
117+
export interface CrawlDataEnhanceConfig extends CrawlCommonConfig {
118+
crawlDatas: (string | CrawlDataDetailConfig)[]
119+
intervalTime?: IntervalTime
120+
}
121+
122+
// 3.Crawl file config
123+
export interface CrawlFileDetailConfig extends CrawlCommonConfig {
107124
url: string
108125
headers?: AnyObject
109-
timeout?: number
110-
proxy?: string
111-
maxRetry?: number
112126
priority?: number
113127
storeDir?: string
114128
fileName?: string
115129
extension?: string
116130
}
117131

118-
// API Config Crawl
119-
export interface CrawlPageConfigObject {
120-
requestConfigs: (string | PageRequestConfig)[]
121-
proxy?: string
122-
timeout?: number
123-
cookies?: PageRequestConfigCookies
124-
intervalTime?: IntervalTime
125-
maxRetry?: number
126-
}
127-
128-
export interface CrawlDataConfigObject {
129-
requestConfigs: (string | DataRequestConfig)[]
130-
proxy?: string
131-
timeout?: number
132+
export interface CrawlFileEnhanceConfig extends CrawlCommonConfig {
133+
crawlFiles: (string | CrawlFileDetailConfig)[]
132134
intervalTime?: IntervalTime
133-
maxRetry?: number
134-
}
135135

136-
export interface CrawlFileConfigObject {
137-
requestConfigs: (string | FileRequestConfig)[]
138-
proxy?: string
139-
timeout?: number
140-
intervalTime?: IntervalTime
141-
maxRetry?: number
142136
fileConfig?: {
143137
storeDir?: string
144138
extension?: string
@@ -151,6 +145,7 @@ export interface CrawlFileConfigObject {
151145
}
152146
}
153147

148+
// 4.Polling config
154149
export interface StartPollingConfig {
155150
d?: number
156151
h?: number

0 commit comments

Comments
 (0)