Skip to content

Commit 03ebd7d

Browse files
committed
Configuration property name change/internal position adjustment
1 parent 4b30a35 commit 03ebd7d

File tree

8 files changed

+89
-122
lines changed

8 files changed

+89
-122
lines changed

src/api.ts

Lines changed: 53 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -50,25 +50,25 @@ export type LoaderCrawlFileDetail = CrawlFileDetailConfig & LoaderHasConfig
5050

5151
// Create config
5252
interface CrawlPageConfigOriginal {
53-
crawlDetails: CrawlPageDetailConfig[]
53+
detailTargets: CrawlPageDetailConfig[]
5454
intervalTime: IntervalTime | undefined
5555
}
5656

57-
type CrawlPageConfig = Omit<CrawlPageConfigOriginal, 'crawlDetails'> & {
58-
crawlDetails: LoaderCrawlPageDetail[]
57+
type CrawlPageConfig = Omit<CrawlPageConfigOriginal, 'detailTargets'> & {
58+
detailTargets: LoaderCrawlPageDetail[]
5959
}
6060

6161
interface CrawlDataConfigOriginal {
62-
crawlDetails: CrawlDataDetailConfig[]
62+
detailTargets: CrawlDataDetailConfig[]
6363
intervalTime: IntervalTime | undefined
6464
}
6565

66-
type CrawlDataConfig = Omit<CrawlDataConfigOriginal, 'crawlDetails'> & {
67-
crawlDetails: LoaderCrawlDataDetail[]
66+
type CrawlDataConfig = Omit<CrawlDataConfigOriginal, 'detailTargets'> & {
67+
detailTargets: LoaderCrawlDataDetail[]
6868
}
6969

7070
interface CrawlFileConfigOriginal {
71-
crawlDetails: CrawlFileDetailConfig[]
71+
detailTargets: CrawlFileDetailConfig[]
7272
intervalTime: IntervalTime | undefined
7373
onBeforeSaveFile:
7474
| ((info: {
@@ -80,8 +80,8 @@ interface CrawlFileConfigOriginal {
8080
| undefined
8181
}
8282

83-
type CrawlFileConfig = Omit<CrawlFileConfigOriginal, 'crawlDetails'> & {
84-
crawlDetails: LoaderCrawlFileDetail[]
83+
type CrawlFileConfig = Omit<CrawlFileConfigOriginal, 'detailTargets'> & {
84+
detailTargets: LoaderCrawlFileDetail[]
8585
}
8686

8787
// API config
@@ -145,16 +145,16 @@ function parsePageCookies(
145145
return cookiesArr
146146
}
147147

148-
function transformToCrawlDetails(
148+
function transformToDetailTargets(
149149
config: string | CrawlPageDetailConfig | (string | CrawlPageDetailConfig)[]
150150
): CrawlPageDetailConfig[]
151-
function transformToCrawlDetails(
151+
function transformToDetailTargets(
152152
config: string | CrawlDataDetailConfig | (string | CrawlDataDetailConfig)[]
153153
): CrawlDataDetailConfig[]
154-
function transformToCrawlDetails(
154+
function transformToDetailTargets(
155155
config: (string | CrawlFileDetailConfig)[]
156156
): CrawlFileDetailConfig[]
157-
function transformToCrawlDetails(config: any) {
157+
function transformToDetailTargets(config: any) {
158158
return isArray(config)
159159
? config.map((item) => (isObject(item) ? item : { url: item }))
160160
: [isObject(config) ? config : { url: config }]
@@ -171,8 +171,8 @@ function loaderCommonConfig(
171171
| CrawlDataConfigOriginal
172172
| CrawlFileConfigOriginal
173173
) {
174-
// 1.crawlDetails
175-
crawlConfig.crawlDetails.forEach((detail) => {
174+
// 1.detailTargets
175+
crawlConfig.detailTargets.forEach((detail) => {
176176
// detail > advanced > app
177177
const { url, timeout, proxy, maxRetry, priority, headers } = detail
178178

@@ -230,44 +230,41 @@ function loaderCommonConfig(
230230

231231
/* Create Config */
232232
/*
233-
每个创建配置函数的返回值都是类似于对应的进阶版(CrawlAdvancedConfig)配置
233+
每个创建配置函数的返回值都是类似于对应的进阶版(类似 CrawlAdvancedConfig)配置
234234
不同点:
235-
- crawlDetails 里面存放的是详细版(CrawlDetailConfig)配置
235+
- detailTargets 里面存放的是详细版(类似 CrawlDetailConfig)配置
236236
- 不会保留与详细版配置相同的选项
237237
238-
生成 advancedConfig 对象可以对每个详细版配置进行装载, 如果传入进阶版(CrawlAdvancedConfig)配置会覆盖生成的
238+
生成 advancedConfig 对象可以对每个详细版配置进行装载, 如果传入进阶版(类似 CrawlAdvancedConfig)配置会覆盖生成的
239239
*/
240240

241241
function createCrawlPageConfig(
242242
xCrawlConfig: LoaderXCrawlConfig,
243243
originalConfig: UniteCrawlPageConfig
244244
): CrawlPageConfig {
245245
const crawlPageConfig: CrawlPageConfigOriginal = {
246-
crawlDetails: [],
246+
detailTargets: [],
247247
intervalTime: undefined
248248
}
249249

250-
let advancedConfig: CrawlPageAdvancedConfig = {
251-
crawlPages: []
252-
}
250+
let advancedConfig: CrawlPageAdvancedConfig = { targets: [] }
253251

254-
// 将每个 crawls 配置转成 detail 类型的配置
255-
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'crawlPages')) {
252+
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'targets')) {
256253
// CrawlPageAdvancedConfig 处理
257-
const { crawlPages } = originalConfig as CrawlPageAdvancedConfig
254+
const { targets } = originalConfig as CrawlPageAdvancedConfig
258255
advancedConfig = originalConfig as CrawlPageAdvancedConfig
259256

260-
crawlPageConfig.crawlDetails.push(...transformToCrawlDetails(crawlPages))
257+
crawlPageConfig.detailTargets.push(...transformToDetailTargets(targets))
261258
} else {
262259
// string | CrawlPageDetailConfig | (string | CrawlPageDetailConfig)[] 处理
263-
const detailes = transformToCrawlDetails(
260+
const detaileTargets = transformToDetailTargets(
264261
originalConfig as
265262
| string
266263
| CrawlPageDetailConfig
267264
| (string | CrawlPageDetailConfig)[]
268265
)
269266

270-
crawlPageConfig.crawlDetails.push(...detailes)
267+
crawlPageConfig.detailTargets.push(...detaileTargets)
271268
}
272269

273270
// 装载公共配置
@@ -276,7 +273,7 @@ function createCrawlPageConfig(
276273
// 装载单独配置
277274
const haveAdvancedCookies = !isUndefined(advancedConfig.cookies)
278275
const haveAdvancedViewport = !isUndefined(advancedConfig.viewport)
279-
crawlPageConfig.crawlDetails.forEach((detail) => {
276+
crawlPageConfig.detailTargets.forEach((detail) => {
280277
// detail > advanced > xCrawl
281278
const { cookies, viewport } = detail
282279

@@ -299,30 +296,28 @@ function createCrawlDataConfig(
299296
originalConfig: UniteCrawlDataConfig
300297
): CrawlDataConfig {
301298
const crawlDataConfig: CrawlDataConfigOriginal = {
302-
crawlDetails: [],
299+
detailTargets: [],
303300
intervalTime: undefined
304301
}
305302

306-
let advancedConfig: CrawlDataAdvancedConfig = {
307-
crawlDatas: []
308-
}
303+
let advancedConfig: CrawlDataAdvancedConfig = { targets: [] }
309304

310-
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'crawlDatas')) {
305+
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'targets')) {
311306
// CrawlDataAdvancedConfig 处理
312-
const { crawlDatas } = originalConfig as CrawlDataAdvancedConfig
307+
const { targets } = originalConfig as CrawlDataAdvancedConfig
313308
advancedConfig = originalConfig as CrawlDataAdvancedConfig
314309

315-
crawlDataConfig.crawlDetails.push(...transformToCrawlDetails(crawlDatas))
310+
crawlDataConfig.detailTargets.push(...transformToDetailTargets(targets))
316311
} else {
317312
// string | CrawlDataDetailConfig | (string | CrawlDataDetailConfig)[] 处理
318-
const crawlDatas = transformToCrawlDetails(
313+
const detaileTargets = transformToDetailTargets(
319314
originalConfig as
320315
| string
321316
| CrawlDataDetailConfig
322317
| (string | CrawlDataDetailConfig)[]
323318
)
324319

325-
crawlDataConfig.crawlDetails.push(...crawlDatas)
320+
crawlDataConfig.detailTargets.push(...detaileTargets)
326321
}
327322

328323
loaderCommonConfig(xCrawlConfig, advancedConfig, crawlDataConfig)
@@ -335,36 +330,36 @@ function createCrawlFileConfig(
335330
originalConfig: UniteCrawlFileConfig
336331
): CrawlFileConfig {
337332
const crawlFileConfig: CrawlFileConfigOriginal = {
338-
crawlDetails: [],
333+
detailTargets: [],
339334
intervalTime: undefined,
340335
onBeforeSaveFile: undefined
341336
}
342337

343-
let advancedConfig: CrawlFileAdvancedConfig = { crawlFiles: [] }
338+
let advancedConfig: CrawlFileAdvancedConfig = { targets: [] }
344339

345-
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'crawlFiles')) {
340+
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'targets')) {
346341
// CrawlFileAdvancedConfig 处理
347-
const { crawlFiles } = originalConfig as CrawlFileAdvancedConfig
342+
const { targets } = originalConfig as CrawlFileAdvancedConfig
348343

349344
advancedConfig = originalConfig as CrawlFileAdvancedConfig
350-
crawlFileConfig.crawlDetails.push(...transformToCrawlDetails(crawlFiles))
345+
crawlFileConfig.detailTargets.push(...transformToDetailTargets(targets))
351346
} else {
352347
// string | CrawlFileDetailConfig | (string | CrawlFileDetailConfig)[] 处理
353-
const crawlFiles = transformToCrawlDetails(
348+
const detailTargets = transformToDetailTargets(
354349
originalConfig as
355350
| string
356351
| CrawlFileDetailConfig
357352
| (string | CrawlFileDetailConfig)[]
358353
)
359354

360-
crawlFileConfig.crawlDetails.push(...crawlFiles)
355+
crawlFileConfig.detailTargets.push(...detailTargets)
361356
}
362357

363358
loaderCommonConfig(xCrawlConfig, advancedConfig, crawlFileConfig)
364359

365360
const haveAdvancedStoreDir = !isUndefined(advancedConfig?.storeDir)
366361
const haveAdvancedExtension = !isUndefined(advancedConfig?.extension)
367-
crawlFileConfig.crawlDetails.forEach((detail) => {
362+
crawlFileConfig.detailTargets.forEach((detail) => {
368363
// 1.storeDir
369364
if (isUndefined(detail.storeDir) && haveAdvancedStoreDir) {
370365
detail.storeDir = advancedConfig!.storeDir
@@ -437,17 +432,17 @@ export function createCrawlPage(xCrawlConfig: LoaderXCrawlConfig) {
437432
}
438433

439434
// 创建新配置
440-
const { crawlDetails, intervalTime } = createCrawlPageConfig(
435+
const { detailTargets, intervalTime } = createCrawlPageConfig(
441436
xCrawlConfig,
442437
config
443438
)
444439

445440
const controllerRes = await controller(
446441
'page',
447442
xCrawlConfig.mode,
448-
crawlDetails,
449-
intervalTime,
443+
detailTargets,
450444
cId,
445+
intervalTime,
451446
crawlPageSingle
452447
)
453448

@@ -491,8 +486,7 @@ export function createCrawlPage(xCrawlConfig: LoaderXCrawlConfig) {
491486
errorPageContainer.delete(cId)
492487

493488
const crawlRes =
494-
isArray(config) ||
495-
(isObject(config) && Object.hasOwn(config, 'crawlPages'))
489+
isArray(config) || (isObject(config) && Object.hasOwn(config, 'targets'))
496490
? crawlResArr
497491
: crawlResArr[0]
498492

@@ -588,17 +582,17 @@ export function createCrawlData(xCrawlConfig: LoaderXCrawlConfig) {
588582
config: UniteCrawlDataConfig,
589583
callback?: (res: any) => void
590584
): Promise<CrawlDataSingleRes<T> | CrawlDataSingleRes<T>[]> {
591-
const { crawlDetails, intervalTime } = createCrawlDataConfig(
585+
const { detailTargets, intervalTime } = createCrawlDataConfig(
592586
xCrawlConfig,
593587
config
594588
)
595589

596590
const controllerRes = await controller(
597591
'data',
598592
xCrawlConfig.mode,
599-
crawlDetails,
600-
intervalTime,
593+
detailTargets,
601594
undefined,
595+
intervalTime,
602596
crawlRequestSingle
603597
)
604598

@@ -636,8 +630,7 @@ export function createCrawlData(xCrawlConfig: LoaderXCrawlConfig) {
636630
})
637631

638632
const crawlRes =
639-
isArray(config) ||
640-
(isObject(config) && Object.hasOwn(config, 'crawlDatas'))
633+
isArray(config) || (isObject(config) && Object.hasOwn(config, 'targets'))
641634
? crawlResArr
642635
: crawlResArr[0]
643636

@@ -671,15 +664,15 @@ export function createCrawlFile(xCrawlConfig: LoaderXCrawlConfig) {
671664
config: UniteCrawlFileConfig,
672665
callback?: (res: any) => void
673666
): Promise<CrawlFileSingleRes | CrawlFileSingleRes[]> {
674-
const { crawlDetails, intervalTime, onBeforeSaveFile } =
667+
const { detailTargets, intervalTime, onBeforeSaveFile } =
675668
createCrawlFileConfig(xCrawlConfig, config)
676669

677670
const controllerRes = await controller(
678671
'file',
679672
xCrawlConfig.mode,
680-
crawlDetails,
681-
intervalTime,
673+
detailTargets,
682674
undefined,
675+
intervalTime,
683676
crawlRequestSingle
684677
)
685678

@@ -803,8 +796,7 @@ export function createCrawlFile(xCrawlConfig: LoaderXCrawlConfig) {
803796
)
804797

805798
const crawlRes =
806-
isArray(config) ||
807-
(isObject(config) && Object.hasOwn(config, 'crawlFiles'))
799+
isArray(config) || (isObject(config) && Object.hasOwn(config, 'targets'))
808800
? crawlResArr
809801
: crawlResArr[0]
810802

src/batchCrawlHandle.ts

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import {
77
} from './api'
88

99
import type { IntervalTime } from './types/api'
10-
import type { ControllerConfig } from './controller'
10+
import type { ControllerConfig, CrawlDetail } from './controller'
1111

1212
async function useSleepByBatch(
1313
isHaventervalTime: boolean,
@@ -32,20 +32,13 @@ async function useSleepByBatch(
3232
}
3333
}
3434

35-
export async function asyncBatchCrawl<
36-
T extends
37-
| LoaderCrawlPageDetail
38-
| LoaderCrawlDataDetail
39-
| LoaderCrawlFileDetail,
40-
V,
41-
C
42-
>(
35+
export async function asyncBatchCrawl<T extends CrawlDetail, V, C>(
4336
controllerConfigs: ControllerConfig<T, V>[],
37+
crawlSingleFnExtra: C,
4438
intervalTime: IntervalTime | undefined,
45-
crawlSingleFnExtraConfig: C,
4639
crawlSingleFn: (
4740
controllerConfig: ControllerConfig<T, V>,
48-
crawlSingleFnExtraConfig: C
41+
crawlSingleFnExtra: C
4942
) => Promise<V>
5043
) {
5144
const isHaventervalTime = !isUndefined(intervalTime)
@@ -64,10 +57,7 @@ export async function asyncBatchCrawl<
6457

6558
controllerConfig.crawlCount++
6659

67-
const crawlSingle = crawlSingleFn(
68-
controllerConfig,
69-
crawlSingleFnExtraConfig
70-
)
60+
const crawlSingle = crawlSingleFn(controllerConfig, crawlSingleFnExtra)
7161
.catch((error) => {
7262
controllerConfig.errorQueue.push(error)
7363
return false
@@ -86,20 +76,13 @@ export async function asyncBatchCrawl<
8676
await Promise.all(crawlQueue)
8777
}
8878

89-
export async function syncBatchCrawl<
90-
T extends
91-
| LoaderCrawlPageDetail
92-
| LoaderCrawlDataDetail
93-
| LoaderCrawlFileDetail,
94-
V,
95-
C
96-
>(
79+
export async function syncBatchCrawl<T extends CrawlDetail, V, C>(
9780
controllerConfigs: ControllerConfig<T, V>[],
81+
crawlSingleFnExtra: C,
9882
intervalTime: IntervalTime | undefined,
99-
crawlSingleFnExtraConfig: C,
10083
crawlSingleFn: (
10184
controllerConfig: ControllerConfig<T, V>,
102-
crawlSingleFnExtraConfig: C
85+
crawlSingleFnExtra: C
10386
) => Promise<V>
10487
) {
10588
const isHaventervalTime = !isUndefined(intervalTime)
@@ -120,7 +103,7 @@ export async function syncBatchCrawl<
120103
try {
121104
controllerConfig.crawlSingleRes = await crawlSingleFn(
122105
controllerConfig,
123-
crawlSingleFnExtraConfig
106+
crawlSingleFnExtra
124107
)
125108
controllerConfig.isSuccess = true
126109
} catch (error: any) {

0 commit comments

Comments
 (0)