@@ -30,10 +30,14 @@ import {
30
30
CrawlFileSingleRes ,
31
31
CrawlFileAdvancedConfig ,
32
32
CrawlDataAdvancedConfig ,
33
- IntervalTime
33
+ IntervalTime ,
34
+ DetailTargetFingerprintCommon ,
35
+ Platform ,
36
+ Mobile
34
37
} from './types/api'
35
38
import { LoaderXCrawlConfig } from './types'
36
39
import { AnyObject } from './types/common'
40
+ import { randomFingerprint } from './default'
37
41
38
42
/* Types */
39
43
@@ -212,7 +216,78 @@ function transformToDetailTargets(config: any) {
212
216
: [ isObject ( config ) ? config : { url : config } ]
213
217
}
214
218
215
- function loaderCommonConfig (
219
+ function loaderCommonFingerprintToDetailTarget (
220
+ detail :
221
+ | CrawlPageDetailTargetConfig
222
+ | CrawlDataDetailTargetConfig
223
+ | CrawlFileDetailTargetConfig ,
224
+ fingerprint : DetailTargetFingerprintCommon
225
+ ) {
226
+ const { userAgent, ua, platform, platformVersion, mobile, acceptLanguage } =
227
+ fingerprint
228
+
229
+ let headers = detail . headers
230
+
231
+ if ( ! headers ) {
232
+ detail . headers = headers = { }
233
+ }
234
+
235
+ // 1.user-agent
236
+ if ( userAgent ) {
237
+ headers [ 'user-agent' ] = userAgent
238
+ }
239
+
240
+ // 2.sec-ch-ua
241
+ if ( ua ) {
242
+ headers [ 'sec-ch-ua' ] = ua
243
+ }
244
+
245
+ // 3.sec-ch-platform
246
+ if ( platform ) {
247
+ headers [ 'sec-ch-platform' ] = platform
248
+ }
249
+
250
+ // 4.sec-ch-ua-platform-version
251
+ if ( platformVersion ) {
252
+ headers [ 'sec-ch-ua-platform-version' ] = platformVersion
253
+ }
254
+
255
+ // 5.sec-ch-mobile
256
+ if ( mobile ) {
257
+ headers [ 'sec-ch-mobile' ] = mobile
258
+ }
259
+
260
+ // 6.accept-language
261
+ if ( acceptLanguage ) {
262
+ headers [ 'accept-language' ] = acceptLanguage
263
+ }
264
+ }
265
+
266
+ function loaderPageFingerprintToDetailTarget (
267
+ detail : CrawlPageDetailTargetConfig ,
268
+ fingerprint : {
269
+ maxWidth : number
270
+ minWidth ?: number
271
+ maxHeight : number
272
+ minHidth ?: number
273
+ }
274
+ ) {
275
+ const { maxWidth, minWidth, maxHeight, minHidth } = fingerprint
276
+
277
+ // 1.width / height
278
+ const width = maxWidth === minWidth ? maxWidth : random ( maxWidth , minWidth )
279
+ const height =
280
+ maxHeight === minHidth ? maxHeight : random ( maxHeight , minHidth )
281
+ const viewport = detail . viewport
282
+ if ( ! viewport ) {
283
+ detail . viewport = { width, height }
284
+ } else {
285
+ viewport . width = width
286
+ viewport . height = height
287
+ }
288
+ }
289
+
290
+ function loaderCommonConfigToCrawlConfig (
216
291
xCrawlConfig : LoaderXCrawlConfig ,
217
292
advancedConfig :
218
293
| CrawlPageAdvancedConfig
@@ -267,77 +342,82 @@ function loaderCommonConfig(
267
342
}
268
343
269
344
// 1.6.header
270
- if ( isUndefined ( headers ) ) {
271
- detail . headers = advancedConfig . headers
345
+ if ( isUndefined ( headers ) && advancedConfig . headers ) {
346
+ detail . headers = { ... advancedConfig . headers }
272
347
}
273
348
274
349
// 1.7.fingerprint(公共部分)
275
350
if ( fingerprint ) {
276
- const { userAgent, ua, platform, mobile, acceptLanguage } = fingerprint
277
- let headers = detail . headers
351
+ // detaileTarget
278
352
279
- if ( ! headers ) {
280
- detail . headers = headers = { }
281
- }
353
+ loaderCommonFingerprintToDetailTarget ( detail , fingerprint )
354
+ } else if ( isUndefined ( fingerprint ) && advancedConfig . fingerprint ) {
355
+ // advancedConfig
356
+
357
+ const {
358
+ userAgents,
359
+ uas,
360
+ platforms,
361
+ platformVersions,
362
+ mobiles,
363
+ acceptLanguages
364
+ } = advancedConfig . fingerprint
282
365
283
366
// 1.user-agent
284
- if ( userAgent ) {
285
- headers [ 'user-agent' ] = userAgent
286
- }
367
+ const userAgent = userAgents
368
+ ? userAgents [ random ( userAgents . length ) ]
369
+ : undefined
287
370
288
371
// 2.sec-ch-ua
289
- if ( ua ) {
290
- headers [ 'sec-ch-ua' ] = ua
291
- }
372
+ const ua = uas ? uas [ random ( uas . length ) ] : undefined
292
373
293
374
// 3.sec-ch-platform
294
- if ( platform ) {
295
- headers [ 'sec-ch-platform' ] = platform
296
- }
297
-
298
- // 4.sec-ch-mobile
299
- if ( mobile ) {
300
- headers [ 'sec-ch-mobile' ] = mobile
301
- }
302
-
303
- // 4.accept-language
304
- if ( acceptLanguage ) {
305
- headers [ 'accept-language' ] = acceptLanguage
306
- }
307
- } else if ( isUndefined ( fingerprint ) && advancedConfig . fingerprint ) {
308
- const { userAgents, uas, platforms, mobiles, acceptLanguages } =
309
- advancedConfig . fingerprint
310
- let headers = detail . headers
375
+ const platform = platforms
376
+ ? platforms [ random ( platforms . length ) ]
377
+ : undefined
378
+
379
+ // 4.sec-ch-platform-version
380
+ const platformVersion = platformVersions
381
+ ? platformVersions [ random ( platformVersions . length ) ]
382
+ : undefined
383
+
384
+ // 5.sec-ch-mobile
385
+ const mobile = mobiles ? mobiles [ random ( mobiles . length ) ] : undefined
386
+
387
+ // 6.accept-language
388
+ const acceptLanguage = acceptLanguages
389
+ ? acceptLanguages [ random ( acceptLanguages . length ) ]
390
+ : undefined
391
+
392
+ loaderCommonFingerprintToDetailTarget ( detail , {
393
+ userAgent,
394
+ ua,
395
+ platform,
396
+ platformVersion,
397
+ mobile,
398
+ acceptLanguage
399
+ } )
400
+ } else if ( xCrawlConfig . enableRandomFingerprint ) {
401
+ // xCrawlConfig
311
402
312
- if ( ! headers ) {
313
- detail . headers = headers = { }
314
- }
403
+ const { platforms, mobiles } = randomFingerprint
315
404
316
405
// 1.user-agent
317
- if ( userAgents ) {
318
- headers [ 'user-agent' ] = userAgents [ random ( userAgents . length ) ]
319
- }
406
+ const userAgent = `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112. ${ random (
407
+ 10
408
+ ) } . ${ random ( 10000 ) } . ${ random ( 1000 ) } Safari/537.36`
320
409
321
- // 2.sec-ch-ua
322
- if ( uas ) {
323
- headers [ 'sec-ch-ua' ] = uas [ random ( uas . length ) ]
324
- }
410
+ // 2.sec-ch-platform
411
+ const platform = platforms [ random ( platforms . length ) ] as Platform
325
412
326
- // 3.sec-ch-platform
327
- if ( platforms ) {
328
- headers [ 'sec-ch-platform' ] = platforms [ random ( platforms . length ) ]
329
- }
413
+ // 3.sec-ch-mobile
414
+ const mobile = mobiles [ random ( mobiles . length ) ] as Mobile
330
415
331
- // 4.sec-ch-mobile
332
- if ( mobiles ) {
333
- headers [ 'sec-ch-mobile' ] = mobiles [ random ( mobiles . length ) ]
334
- }
335
-
336
- // 4.accept-language
337
- if ( acceptLanguages ) {
338
- headers [ 'accept-language' ] =
339
- acceptLanguages [ random ( acceptLanguages . length ) ]
340
- }
416
+ loaderCommonFingerprintToDetailTarget ( detail , {
417
+ userAgent,
418
+ platform,
419
+ mobile
420
+ } )
341
421
}
342
422
} )
343
423
@@ -354,30 +434,6 @@ function loaderCommonConfig(
354
434
crawlConfig . onCrawlItemComplete = advancedConfig . onCrawlItemComplete
355
435
}
356
436
357
- function loaderPageDetailTargetFingerprint (
358
- detail : CrawlPageDetailTargetConfig ,
359
- fingerprint : {
360
- maxWidth : number
361
- minWidth ?: number
362
- maxHeight : number
363
- minHidth ?: number
364
- }
365
- ) {
366
- const { maxWidth, minWidth, maxHeight, minHidth } = fingerprint
367
-
368
- // 1.width / height
369
- const width = maxWidth === minWidth ? maxWidth : random ( maxWidth , minWidth )
370
- const height =
371
- maxHeight === minHidth ? maxHeight : random ( maxHeight , minHidth )
372
- const viewport = detail . viewport
373
- if ( ! viewport ) {
374
- detail . viewport = { width, height }
375
- } else {
376
- viewport . width = width
377
- viewport . height = height
378
- }
379
- }
380
-
381
437
/* Create Config */
382
438
/*
383
439
每个创建配置函数的返回值都是类似于进阶版配置
@@ -419,7 +475,7 @@ function createCrawlPageConfig(
419
475
}
420
476
421
477
// 装载公共配置
422
- loaderCommonConfig ( xCrawlConfig , advancedConfig , crawlPageConfig )
478
+ loaderCommonConfigToCrawlConfig ( xCrawlConfig , advancedConfig , crawlPageConfig )
423
479
424
480
// 装载单独配置
425
481
crawlPageConfig . detailTargets . forEach ( ( detail ) => {
@@ -438,9 +494,9 @@ function createCrawlPageConfig(
438
494
439
495
// 3.fingerprint
440
496
if ( fingerprint ) {
441
- loaderPageDetailTargetFingerprint ( detail , fingerprint )
497
+ loaderPageFingerprintToDetailTarget ( detail , fingerprint )
442
498
} else if ( isUndefined ( fingerprint ) && advancedConfig . fingerprint ) {
443
- loaderPageDetailTargetFingerprint ( detail , advancedConfig . fingerprint )
499
+ loaderPageFingerprintToDetailTarget ( detail , advancedConfig . fingerprint )
444
500
}
445
501
} )
446
502
@@ -477,7 +533,7 @@ function createCrawlDataConfig<T>(
477
533
crawlDataConfig . detailTargets . push ( ...detaileTargets )
478
534
}
479
535
480
- loaderCommonConfig ( xCrawlConfig , advancedConfig , crawlDataConfig )
536
+ loaderCommonConfigToCrawlConfig ( xCrawlConfig , advancedConfig , crawlDataConfig )
481
537
482
538
return crawlDataConfig as CrawlDataConfig
483
539
}
@@ -510,7 +566,7 @@ function createCrawlFileConfig(
510
566
)
511
567
}
512
568
513
- loaderCommonConfig ( xCrawlConfig , advancedConfig , crawlFileConfig )
569
+ loaderCommonConfigToCrawlConfig ( xCrawlConfig , advancedConfig , crawlFileConfig )
514
570
515
571
const haveAdvancedStoreDir = ! isUndefined ( advancedConfig ?. storeDir )
516
572
const haveAdvancedExtension = ! isUndefined ( advancedConfig ?. extension )
0 commit comments