@@ -14,7 +14,8 @@ import {
14
14
logError ,
15
15
logSuccess ,
16
16
logWarn ,
17
- mkdirDirSync
17
+ mkdirDirSync ,
18
+ random
18
19
} from './utils'
19
20
20
21
import {
@@ -216,7 +217,8 @@ function loaderCommonConfig(
216
217
// 1.detailTargets
217
218
crawlConfig . detailTargets . forEach ( ( detail ) => {
218
219
// detail > advanced > app
219
- const { url, timeout, proxy, maxRetry, priority, headers } = detail
220
+ const { url, timeout, proxy, maxRetry, priority, headers, fingerprint } =
221
+ detail
220
222
221
223
// 1.1.baseUrl
222
224
if ( ! isUndefined ( xCrawlConfig . baseUrl ) ) {
@@ -259,6 +261,75 @@ function loaderCommonConfig(
259
261
if ( isUndefined ( headers ) ) {
260
262
detail . headers = advancedConfig . headers
261
263
}
264
+
265
+ // 1.7.fingerprint(公共部分)
266
+ if ( fingerprint ) {
267
+ const { userAgent, ua, platform, mobile, acceptLanguage } = fingerprint
268
+ let headers = detail . headers
269
+
270
+ if ( ! headers ) {
271
+ detail . headers = headers = { }
272
+ }
273
+
274
+ // 1.user-agent
275
+ if ( userAgent ) {
276
+ headers [ 'user-agent' ] = userAgent
277
+ }
278
+
279
+ // 2.sec-ch-ua
280
+ if ( ua ) {
281
+ headers [ 'sec-ch-ua' ] = ua
282
+ }
283
+
284
+ // 3.sec-ch-platform
285
+ if ( platform ) {
286
+ headers [ 'sec-ch-platform' ] = platform
287
+ }
288
+
289
+ // 4.sec-ch-mobile
290
+ if ( mobile ) {
291
+ headers [ 'sec-ch-mobile' ] = mobile
292
+ }
293
+
294
+ // 4.accept-language
295
+ if ( acceptLanguage ) {
296
+ headers [ 'accept-language' ] = acceptLanguage
297
+ }
298
+ } else if ( isUndefined ( fingerprint ) && advancedConfig . fingerprint ) {
299
+ const { userAgents, uas, platforms, mobiles, acceptLanguages } =
300
+ advancedConfig . fingerprint
301
+ let headers = detail . headers
302
+
303
+ if ( ! headers ) {
304
+ detail . headers = headers = { }
305
+ }
306
+
307
+ // 1.user-agent
308
+ if ( userAgents ) {
309
+ headers [ 'user-agent' ] = userAgents [ random ( userAgents . length ) ]
310
+ }
311
+
312
+ // 2.sec-ch-ua
313
+ if ( uas ) {
314
+ headers [ 'sec-ch-ua' ] = uas [ random ( uas . length ) ]
315
+ }
316
+
317
+ // 3.sec-ch-platform
318
+ if ( platforms ) {
319
+ headers [ 'sec-ch-platform' ] = platforms [ random ( platforms . length ) ]
320
+ }
321
+
322
+ // 4.sec-ch-mobile
323
+ if ( mobiles ) {
324
+ headers [ 'sec-ch-mobile' ] = mobiles [ random ( mobiles . length ) ]
325
+ }
326
+
327
+ // 4.accept-language
328
+ if ( acceptLanguages ) {
329
+ headers [ 'accept-language' ] =
330
+ acceptLanguages [ random ( acceptLanguages . length ) ]
331
+ }
332
+ }
262
333
} )
263
334
264
335
// 2.intervalTime
@@ -274,6 +345,30 @@ function loaderCommonConfig(
274
345
crawlConfig . onCrawlItemComplete = advancedConfig . onCrawlItemComplete
275
346
}
276
347
348
+ function loaderPageDetailFingerprint (
349
+ detail : CrawlPageDetailConfig ,
350
+ fingerprint : {
351
+ maxWidth : number
352
+ minWidth ?: number
353
+ maxHeight : number
354
+ minHidth ?: number
355
+ }
356
+ ) {
357
+ const { maxWidth, minWidth, maxHeight, minHidth } = fingerprint
358
+
359
+ // 1.width / height
360
+ const width = maxWidth === minWidth ? maxWidth : random ( maxWidth , minWidth )
361
+ const height =
362
+ maxHeight === minHidth ? maxHeight : random ( maxHeight , minHidth )
363
+ const viewport = detail . viewport
364
+ if ( ! viewport ) {
365
+ detail . viewport = { width, height }
366
+ } else {
367
+ viewport . width = width
368
+ viewport . height = height
369
+ }
370
+ }
371
+
277
372
/* Create Config */
278
373
/*
279
374
每个创建配置函数的返回值都是类似于对应的进阶版(类似 CrawlAdvancedConfig)配置
@@ -318,21 +413,26 @@ function createCrawlPageConfig(
318
413
loaderCommonConfig ( xCrawlConfig , advancedConfig , crawlPageConfig )
319
414
320
415
// 装载单独配置
321
- const haveAdvancedCookies = ! isUndefined ( advancedConfig . cookies )
322
- const haveAdvancedViewport = ! isUndefined ( advancedConfig . viewport )
323
416
crawlPageConfig . detailTargets . forEach ( ( detail ) => {
324
417
// detail > advanced > xCrawl
325
- const { cookies, viewport } = detail
418
+ const { cookies, viewport, fingerprint } = detail
326
419
327
420
// 1.cookies
328
- if ( isUndefined ( cookies ) && haveAdvancedCookies ) {
421
+ if ( isUndefined ( cookies ) && advancedConfig . cookies ) {
329
422
detail . cookies = advancedConfig . cookies
330
423
}
331
424
332
425
// 2.viewport
333
- if ( isUndefined ( viewport ) && haveAdvancedViewport ) {
426
+ if ( isUndefined ( viewport ) && advancedConfig . viewport ) {
334
427
detail . viewport = advancedConfig . viewport
335
428
}
429
+
430
+ // 3.fingerprint
431
+ if ( fingerprint ) {
432
+ loaderPageDetailFingerprint ( detail , fingerprint )
433
+ } else if ( isUndefined ( fingerprint ) && advancedConfig . fingerprint ) {
434
+ loaderPageDetailFingerprint ( detail , advancedConfig . fingerprint )
435
+ }
336
436
} )
337
437
338
438
return crawlPageConfig as CrawlPageConfig
@@ -657,6 +757,8 @@ export function createCrawlPage(xCrawlConfig: LoaderXCrawlConfig) {
657
757
const { detailTargets, intervalTime, onCrawlItemComplete } =
658
758
createCrawlPageConfig ( xCrawlConfig , config )
659
759
760
+ log ( detailTargets )
761
+
660
762
const extraConfig : ExtraPageConfig = {
661
763
errorPageMap : new Map ( ) ,
662
764
browser : browser ! ,
0 commit comments