Skip to content

Commit b57b1e2

Browse files
committed
The onBeforeSaveFile lifecycle function is renamed onBeforeSaveItemFile / Miscellaneous live
1 parent 1dacd25 commit b57b1e2

File tree

4 files changed

+95
-87
lines changed

4 files changed

+95
-87
lines changed

src/api.ts

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ interface ExtraFileConfig extends ExtraCommonConfig {
8484
onCrawlItemComplete:
8585
| ((crawlFileSingleRes: CrawlFileSingleRes) => void)
8686
| undefined
87-
onBeforeSaveFile:
87+
onBeforeSaveItemFile:
8888
| ((info: {
8989
id: number
9090
fileName: string
@@ -120,7 +120,7 @@ interface CrawlDataConfigOriginal {
120120
interface CrawlFileConfigOriginal {
121121
detailTargets: CrawlFileDetailTargetConfig[]
122122
intervalTime: IntervalTime | undefined
123-
onBeforeSaveFile:
123+
onBeforeSaveItemFile:
124124
| ((info: {
125125
id: number
126126
fileName: string
@@ -195,27 +195,29 @@ function parsePageCookies(
195195
return cookiesArr
196196
}
197197

198-
function transformToDetailTargets(
198+
function transformTargetToDetailTargets(
199199
config:
200200
| string
201201
| CrawlPageDetailTargetConfig
202202
| (string | CrawlPageDetailTargetConfig)[]
203203
): CrawlPageDetailTargetConfig[]
204-
function transformToDetailTargets(
204+
function transformTargetToDetailTargets(
205205
config:
206206
| string
207207
| CrawlDataDetailTargetConfig
208208
| (string | CrawlDataDetailTargetConfig)[]
209209
): CrawlDataDetailTargetConfig[]
210-
function transformToDetailTargets(
210+
function transformTargetToDetailTargets(
211211
config: (string | CrawlFileDetailTargetConfig)[]
212212
): CrawlFileDetailTargetConfig[]
213-
function transformToDetailTargets(config: any) {
213+
function transformTargetToDetailTargets(config: any) {
214214
return isArray(config)
215215
? config.map((item) => (isObject(item) ? item : { url: item }))
216216
: [isObject(config) ? config : { url: config }]
217217
}
218218

219+
/* Loader config */
220+
219221
function loaderCommonFingerprintToDetailTarget(
220222
detail:
221223
| CrawlPageDetailTargetConfig
@@ -434,7 +436,7 @@ function loaderCommonConfigToCrawlConfig(
434436
crawlConfig.onCrawlItemComplete = advancedConfig.onCrawlItemComplete
435437
}
436438

437-
/* Create Config */
439+
/* Create config */
438440
/*
439441
每个创建配置函数的返回值都是类似于进阶版配置
440442
不同点:
@@ -461,10 +463,12 @@ function createCrawlPageConfig(
461463
const { targets } = originalConfig as CrawlPageAdvancedConfig
462464
advancedConfig = originalConfig as CrawlPageAdvancedConfig
463465

464-
crawlPageConfig.detailTargets.push(...transformToDetailTargets(targets))
466+
crawlPageConfig.detailTargets.push(
467+
...transformTargetToDetailTargets(targets)
468+
)
465469
} else {
466470
// string | CrawlPageDetailTargetConfig | (string | CrawlPageDetailTargetConfig)[] 处理
467-
const detaileTargets = transformToDetailTargets(
471+
const detaileTargets = transformTargetToDetailTargets(
468472
originalConfig as
469473
| string
470474
| CrawlPageDetailTargetConfig
@@ -520,10 +524,12 @@ function createCrawlDataConfig<T>(
520524
const { targets } = originalConfig as CrawlDataAdvancedConfig<T>
521525
advancedConfig = originalConfig as CrawlDataAdvancedConfig<T>
522526

523-
crawlDataConfig.detailTargets.push(...transformToDetailTargets(targets))
527+
crawlDataConfig.detailTargets.push(
528+
...transformTargetToDetailTargets(targets)
529+
)
524530
} else {
525531
// string | CrawlDataDetailTargetConfig | (string | CrawlDataDetailTargetConfig)[] 处理
526-
const detaileTargets = transformToDetailTargets(
532+
const detaileTargets = transformTargetToDetailTargets(
527533
originalConfig as
528534
| string
529535
| CrawlDataDetailTargetConfig
@@ -545,7 +551,7 @@ function createCrawlFileConfig(
545551
const crawlFileConfig: CrawlFileConfigOriginal = {
546552
detailTargets: [],
547553
intervalTime: undefined,
548-
onBeforeSaveFile: undefined,
554+
onBeforeSaveItemFile: undefined,
549555
onCrawlItemComplete: undefined
550556
}
551557

@@ -556,7 +562,9 @@ function createCrawlFileConfig(
556562
const { targets } = originalConfig as CrawlFileAdvancedConfig
557563

558564
advancedConfig = originalConfig as CrawlFileAdvancedConfig
559-
crawlFileConfig.detailTargets.push(...transformToDetailTargets(targets))
565+
crawlFileConfig.detailTargets.push(
566+
...transformTargetToDetailTargets(targets)
567+
)
560568
} else {
561569
// CrawlFileDetailTargetConfig | CrawlFileDetailTargetConfig[] 处理
562570
crawlFileConfig.detailTargets.push(
@@ -582,7 +590,7 @@ function createCrawlFileConfig(
582590
}
583591
})
584592

585-
crawlFileConfig.onBeforeSaveFile = advancedConfig.onBeforeSaveFile
593+
crawlFileConfig.onBeforeSaveItemFile = advancedConfig.onBeforeSaveItemFile
586594

587595
return crawlFileConfig as CrawlFileConfig
588596
}
@@ -697,7 +705,7 @@ function fileSingleResultHandle(
697705
saveFilePendingQueue,
698706

699707
onCrawlItemComplete,
700-
onBeforeSaveFile
708+
onBeforeSaveItemFile
701709
} = extraConfig
702710

703711
const crawlFileSingleRes: AnyObject = detaileInfo
@@ -721,8 +729,8 @@ function fileSingleResultHandle(
721729
// 在保存前的回调
722730
const data = detailTargetRes.data
723731
let dataPromise = Promise.resolve(data)
724-
if (onBeforeSaveFile) {
725-
dataPromise = onBeforeSaveFile({
732+
if (onBeforeSaveItemFile) {
733+
dataPromise = onBeforeSaveItemFile({
726734
id,
727735
fileName,
728736
filePath,
@@ -959,7 +967,7 @@ export function createCrawlFile(xCrawlConfig: LoaderXCrawlConfig) {
959967
const {
960968
detailTargets,
961969
intervalTime,
962-
onBeforeSaveFile,
970+
onBeforeSaveItemFile,
963971
onCrawlItemComplete
964972
} = createCrawlFileConfig(xCrawlConfig, config)
965973

@@ -969,7 +977,7 @@ export function createCrawlFile(xCrawlConfig: LoaderXCrawlConfig) {
969977

970978
intervalTime,
971979
onCrawlItemComplete,
972-
onBeforeSaveFile
980+
onBeforeSaveItemFile
973981
}
974982

975983
const crawlResArr = (await controller(

src/types/api.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import { AnyObject } from './common'
55

66
/* API Config */
77

8-
// API Config Other
8+
// API crawl config
9+
10+
// API crawl config other
911
export type IntervalTime = number | { max: number; min?: number }
1012

1113
export type Method =
@@ -47,14 +49,6 @@ export type Platform =
4749

4850
export type Mobile = '?0' | '?1'
4951

50-
// API crawl config
51-
// Common
52-
export interface CrawlCommonConfig {
53-
timeout?: number
54-
proxy?: string
55-
maxRetry?: number
56-
}
57-
5852
export interface DetailTargetFingerprintCommon {
5953
userAgent?: string
6054
ua?: string
@@ -73,7 +67,13 @@ export interface AdvancedFingerprintCommon {
7367
acceptLanguages?: string[]
7468
}
7569

76-
// 1.Detail
70+
export interface CrawlCommonConfig {
71+
timeout?: number
72+
proxy?: string
73+
maxRetry?: number
74+
}
75+
76+
// 1.Detail target
7777
export interface CrawlPageDetailTargetConfig extends CrawlCommonConfig {
7878
url: string
7979
headers?: AnyObject | null
@@ -148,7 +148,7 @@ export interface CrawlFileAdvancedConfig extends CrawlCommonConfig {
148148
extension?: string
149149

150150
onCrawlItemComplete?: (crawlFileSingleRes: CrawlFileSingleRes) => void
151-
onBeforeSaveFile?: (info: {
151+
onBeforeSaveItemFile?: (info: {
152152
id: number
153153
fileName: string
154154
filePath: string

test/environment/crawlFile.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ async function storeConfig() {
116116
],
117117
storeDir: path.resolve(__dirname, './upload'),
118118
extension: '.jpg',
119-
async onBeforeSaveFile(info) {
119+
async onBeforeSaveItemFile(info) {
120120
record.push(info.fileName)
121121
return info.data
122122
}

test/start/index.ts

Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9,64 +9,64 @@ const testXCrawl = xCrawl({
99
intervalTime: { max: 5000, min: 3000 }
1010
})
1111

12-
testXCrawl
13-
.crawlFile({
14-
targets: ['/4401.jpg', '/4403.jpg', '/4404.jpg', '/4406.jpg', '/4407.jpg'],
15-
proxy: 'http://localhost:14892',
16-
headers: {
17-
test: 'test'
18-
},
19-
storeDir: path.resolve(__dirname, './upload'),
20-
onBeforeSaveFile(info) {
21-
return sharp(info.data).resize(200).toBuffer()
22-
},
23-
onCrawlItemComplete(crawlFileSingleRes) {
24-
// console.log(111, crawlFileSingleRes)
25-
}
26-
})
27-
.then(async (res) => {
28-
// console.log(res)
29-
30-
res.forEach((item) => {
31-
console.log(item.data?.data.isSuccess)
32-
})
33-
})
34-
3512
// testXCrawl
36-
// .crawlPage({
37-
// targets: [
38-
// 'https://github.com/coder-hxl',
39-
// {
40-
// url: 'https://github.com/coder-hxl/x-crawl',
41-
// fingerprint: null
42-
// },
43-
// {
44-
// url: 'https://github.com/coder-hxl/x-crawl/stargazers',
45-
// fingerprint: {
46-
// maxWidth: 1980,
47-
// minWidth: 1980,
48-
// maxHeight: 1080,
49-
// minHidth: 1080,
50-
// platform: 'Android'
51-
// }
52-
// }
53-
// ],
54-
// fingerprint: {
55-
// maxWidth: 1980,
56-
// maxHeight: 1080,
57-
// userAgents: [
58-
// 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
59-
// 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
60-
// 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
61-
// 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
62-
// ],
63-
// platforms: ['Chromium OS', 'iOS', 'Linux', 'macOS', 'Windows']
13+
// .crawlFile({
14+
// targets: ['/4401.jpg', '/4403.jpg', '/4404.jpg', '/4406.jpg', '/4407.jpg'],
15+
// proxy: 'http://localhost:14892',
16+
// headers: {
17+
// test: 'test'
18+
// },
19+
// storeDir: path.resolve(__dirname, './upload'),
20+
// onBeforeSaveItemFile(info) {
21+
// return sharp(info.data).resize(200).toBuffer()
22+
// },
23+
// onCrawlItemComplete(crawlFileSingleRes) {
24+
// // console.log(111, crawlFileSingleRes)
6425
// }
6526
// })
66-
// .then((res) => {
67-
// res.forEach((item, i) => {
68-
// item.data.page.screenshot({ path: `./img${i}.jpg` }).then(() => {
69-
// console.log(i, 'success')
70-
// })
27+
// .then(async (res) => {
28+
// // console.log(res)
29+
30+
// res.forEach((item) => {
31+
// console.log(item.data?.data.isSuccess)
7132
// })
7233
// })
34+
35+
testXCrawl
36+
.crawlPage({
37+
targets: [
38+
'https://github.com/coder-hxl',
39+
{
40+
url: 'https://github.com/coder-hxl/x-crawl',
41+
fingerprint: null
42+
},
43+
{
44+
url: 'https://github.com/coder-hxl/x-crawl/stargazers',
45+
fingerprint: {
46+
maxWidth: 1980,
47+
minWidth: 1980,
48+
maxHeight: 1080,
49+
minHidth: 1080,
50+
platform: 'Android'
51+
}
52+
}
53+
],
54+
fingerprint: {
55+
maxWidth: 1980,
56+
maxHeight: 1080,
57+
userAgents: [
58+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
59+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
60+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
61+
'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
62+
],
63+
platforms: ['Chromium OS', 'iOS', 'Linux', 'macOS', 'Windows']
64+
}
65+
})
66+
.then((res) => {
67+
res.forEach((item, i) => {
68+
item.data.page.screenshot({ path: `./img${i}.jpg` }).then(() => {
69+
console.log(i, 'success')
70+
})
71+
})
72+
})

0 commit comments

Comments
 (0)