Skip to content

Commit ea372e1

Browse files
committed
feat: crawlFile API configuration flexible upgrade
1 parent 777a9af commit ea372e1

File tree

6 files changed

+83
-29
lines changed

6 files changed

+83
-29
lines changed

src/api.ts

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { quickSort } from './sort'
99
import {
1010
isArray,
1111
isObject,
12+
isString,
1213
isUndefined,
1314
log,
1415
logError,
@@ -653,21 +654,40 @@ function createCrawlFileConfig(
653654
crawlFileConfig
654655
)
655656

656-
const haveAdvancedStoreDir = !isUndefined(
657-
advancedDetailTargetsConfig?.storeDir
658-
)
659-
const haveAdvancedExtension = !isUndefined(
660-
advancedDetailTargetsConfig?.extension
657+
const advancedStoreDirInfo = {
658+
exist: !isUndefined(advancedDetailTargetsConfig?.storeDirs),
659+
type: isString(advancedDetailTargetsConfig?.storeDirs) ? 0 : 1
660+
}
661+
662+
const AdvancedExtension = {
663+
exist: !isUndefined(advancedDetailTargetsConfig?.extensions),
664+
type: isString(advancedDetailTargetsConfig?.extensions) ? 0 : 1
665+
}
666+
const haveAdvancedFileNames = !isUndefined(
667+
advancedDetailTargetsConfig?.fileNames
661668
)
662-
crawlFileConfig.detailTargets.forEach((detail) => {
669+
crawlFileConfig.detailTargets.forEach((detail, i) => {
663670
// 1.storeDir
664-
if (isUndefined(detail.storeDir) && haveAdvancedStoreDir) {
665-
detail.storeDir = advancedDetailTargetsConfig!.storeDir
671+
if (isUndefined(detail.storeDir) && advancedStoreDirInfo.exist) {
672+
detail.storeDir =
673+
advancedStoreDirInfo.type === 0
674+
? (advancedDetailTargetsConfig!.storeDirs as string)
675+
: (advancedDetailTargetsConfig!.storeDirs as (string | null)[])[i]
666676
}
667677

668678
// 2.extension
669-
if (isUndefined(detail.extension) && haveAdvancedExtension) {
670-
detail.extension = advancedDetailTargetsConfig!.extension
679+
if (isUndefined(detail.extension) && AdvancedExtension.exist) {
680+
detail.extension =
681+
advancedStoreDirInfo.type === 0
682+
? (advancedDetailTargetsConfig!.extensions as string)
683+
: (advancedDetailTargetsConfig!.extensions as (string | null)[])[i]
684+
}
685+
686+
// 3.fileName
687+
if (isUndefined(detail.fileName) && haveAdvancedFileNames) {
688+
detail.fileName = (
689+
advancedDetailTargetsConfig!.fileNames as (string | null)[]
690+
)[i]
671691
}
672692
})
673693

src/types/api.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ export interface CrawlFileDetailTargetConfig extends CrawlCommonConfig {
110110
headers?: AnyObject | null
111111
priority?: number
112112
storeDir?: string | null
113-
fileName?: string
113+
fileName?: string | null
114114
extension?: string | null
115115
fingerprint?: DetailTargetFingerprintCommon | null
116116
}
@@ -149,10 +149,11 @@ export interface CrawlFileAdvancedConfig extends CrawlCommonConfig {
149149
targets: (string | CrawlFileDetailTargetConfig)[]
150150
intervalTime?: IntervalTime
151151
fingerprints?: DetailTargetFingerprintCommon[]
152+
storeDirs?: string | (string | null)[]
153+
extensions?: string | (string | null)[]
154+
fileNames?: (string | null)[]
152155

153156
headers?: AnyObject
154-
storeDir?: string
155-
extension?: string
156157

157158
onCrawlItemComplete?: (crawlFileSingleResult: CrawlFileSingleResult) => void
158159
onBeforeSaveItemFile?: (info: {

test/environment/api/crawlFile.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ const urls: string[] = [
2222
'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area/4403.jpg'
2323
]
2424

25-
const storeDir = path.resolve(__dirname, './upload')
25+
const storeDirs = path.resolve(__dirname, './upload')
2626

2727
async function testCrawlFile() {
2828
const testXCrawl = xCrawl({ proxy: { urls: ['http://localhost:14892'] } })
2929

3030
const res = await testXCrawl.crawlFile({
3131
targets: urls,
32-
storeDir
32+
storeDirs
3333
})
3434

3535
return res.reduce(

test/environment/written/crawlFile.test.ts

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ const urls: string[] = [
2222
'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area/4403.jpg'
2323
]
2424

25-
const storeDir = path.resolve(__dirname, './upload')
25+
const storeDirs = path.resolve(__dirname, './upload')
2626

2727
/* 1.Written */
2828
// 1.1.written CrawlFileDetailConfig
2929
async function writtenCrawlFileDetailConfig() {
3030
const testXCrawl = xCrawl({ proxy: { urls: ['http://localhost:14892'] } })
3131

32-
const res = await testXCrawl.crawlFile({ url: urls[0], storeDir })
32+
const res = await testXCrawl.crawlFile({ url: urls[0], storeDir: storeDirs })
3333

3434
return res.isSuccess && res.data?.data.isSuccess
3535
}
@@ -38,7 +38,9 @@ async function writtenCrawlFileDetailConfig() {
3838
async function writtenCrawlFileDetailConfigArr() {
3939
const testXCrawl = xCrawl({ proxy: { urls: ['http://localhost:14892'] } })
4040

41-
const res = await testXCrawl.crawlFile(urls.map((url) => ({ url, storeDir })))
41+
const res = await testXCrawl.crawlFile(
42+
urls.map((url) => ({ url, storeDir: storeDirs }))
43+
)
4244

4345
return res.reduce(
4446
(prev, item) => prev && item.isSuccess && !!item.data?.data.isSuccess,
@@ -52,7 +54,7 @@ async function writtenCrawlFileAdvancedConfig() {
5254

5355
const res = await testXCrawl.crawlFile({
5456
targets: urls,
55-
storeDir
57+
storeDirs
5658
})
5759

5860
return res.reduce(
@@ -75,7 +77,7 @@ async function loaderBaseConfig() {
7577

7678
const res = await testXCrawl.crawlFile({
7779
targets: ['/4401.jpg', '/4403.jpg'],
78-
storeDir
80+
storeDirs
7981
})
8082

8183
return res.reduce((prev, item) => prev && item.isSuccess, true)
@@ -92,7 +94,7 @@ async function loaderAdvancedConfig() {
9294
targets: ['/4401.jpg', '/4403.jpg'],
9395
proxy: { urls: ['http://localhost:14892'] },
9496
timeout: 10000,
95-
storeDir,
97+
storeDirs,
9698
intervalTime: { max: 1000 },
9799
maxRetry: 0
98100
})
@@ -114,8 +116,8 @@ async function storeConfig() {
114116
{ url: '/4401.jpg', fileName: '4401' },
115117
{ url: '/4403.jpg', fileName: '4403' }
116118
],
117-
storeDir: path.resolve(__dirname, './upload'),
118-
extension: '.jpg',
119+
storeDirs: path.resolve(__dirname, './upload'),
120+
extensions: '.jpg',
119121
async onBeforeSaveItemFile(info) {
120122
record.push(info.fileName)
121123
return info.data

test/start/index.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/start/index.ts

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,40 @@
11
import xCrawl from 'x-crawl'
2-
import sharp from 'sharp'
3-
import path from 'path'
42

5-
const testXCrawl = xCrawl({ maxRetry: 2 })
3+
const testXCrawl = xCrawl({
4+
baseUrl: 'http://8.210.98.225:9001/api/room/193581217/room_picture'
5+
})
6+
7+
const names = [
8+
'1672468265289r193581217',
9+
'1672468265300r193581217',
10+
'1672468265284r193581217',
11+
'1672468265279r193581217',
12+
'1672468265298r193581217',
13+
'1672468265293r193581217'
14+
]
15+
16+
const targets: string[] = []
17+
const fileNames: (string | null)[] = []
18+
const storeDirs: string[] = []
19+
const extensions: (string | null)[] = []
20+
21+
names.forEach((name, i) => {
22+
targets.push(`/${name}.jpg`)
23+
24+
if (i % 2) {
25+
fileNames.push(name)
26+
storeDirs.push(`./upload/${name}`)
27+
extensions.push('.jpg')
28+
} else {
29+
fileNames.push(null)
30+
storeDirs.push('./upload')
31+
extensions.push(null)
32+
}
33+
})
634

7-
testXCrawl.crawlData(['https://', 'https://', 'https://']).then((res) => {
8-
console.log(res)
35+
testXCrawl.crawlFile({
36+
targets,
37+
storeDirs,
38+
fileNames,
39+
extensions
940
})

0 commit comments

Comments
 (0)