Skip to content

Commit 2f891e1

Browse files
committed
chore: move code location
1 parent db760a1 commit 2f891e1

File tree

6 files changed

+84
-114
lines changed

6 files changed

+84
-114
lines changed

src/api.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ import puppeteer, { Browser, HTTPResponse, Page, Protocol } from 'puppeteer'
55

66
import { Device, controller, isCrawlStatusInHttpStatus } from './controller'
77
import { Request, request } from './request'
8-
import { quickSort } from './sort'
98
import {
9+
mergeSort,
1010
isArray,
1111
isObject,
1212
isString,
@@ -1350,7 +1350,7 @@ export function createCrawlFile(xCrawlInstanceConfig: XCrawlInstanceConfig) {
13501350

13511351
if (logConfig.result) {
13521352
// 打印保存错误
1353-
quickSort(saveFileErrorArr).forEach((item) =>
1353+
mergeSort(saveFileErrorArr).forEach((item) =>
13541354
log(`${infoConfig.serialNumber} | ${logError(item.message)}`)
13551355
)
13561356

src/controller.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { asyncBatchCrawl, syncBatchCrawl } from './batchCrawl'
2-
import { priorityQueueMergeSort } from './sort'
32

43
import {
54
InfoCommonConfig,
@@ -11,6 +10,7 @@ import {
1110
} from './api'
1211

1312
import {
13+
mergeSort,
1414
isObject,
1515
isUndefined,
1616
log,
@@ -96,9 +96,9 @@ export async function controller<
9696
(item) => item.priority === detailTargets[0].priority
9797
)
9898
const detailTargetConfigs = isPriorityCrawl
99-
? priorityQueueMergeSort(
99+
? mergeSort(
100100
detailTargets.map((item) => ({ ...item, valueOf: () => item.priority }))
101-
)
101+
).reverse()
102102
: detailTargets
103103

104104
// 生成装置
@@ -185,8 +185,9 @@ export async function controller<
185185
)!.state = false
186186

187187
// 寻找新代理 URL
188-
const newProxyUrl = proxyDetails.find((detaile) => detaile.state)
189-
?.url
188+
const newProxyUrl = proxyDetails.find(
189+
(detaile) => detaile.state
190+
)?.url
190191

191192
// 使用新代理 URL
192193
if (!isUndefined(newProxyUrl)) {

src/sort.ts

Lines changed: 0 additions & 77 deletions
This file was deleted.

src/types/api.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ export interface CrawlDataDetailTargetConfig extends CrawlCommonConfig {
115115
export interface CrawlFileDetailTargetConfig extends CrawlCommonConfig {
116116
url: string
117117
headers?: AnyObject | null
118+
priority?: number
118119
storeDir?: string | null
119120
fileName?: string | null
120121
extension?: string | null

src/utils.ts

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,6 @@
11
import chalk from 'chalk'
22

3-
export function sleep(timeout: number) {
4-
return new Promise((resolve) => setTimeout(resolve, timeout))
5-
}
6-
7-
export function random(max: number, min = 0) {
8-
let result = Math.floor(Math.random() * max)
9-
10-
while (result < min) {
11-
result = Math.floor(Math.random() * max)
12-
}
13-
14-
return result
15-
}
16-
3+
// Log
174
export const log = console.log
185
export const logStart = chalk.blueBright
196
export const logStatistics = chalk.whiteBright
@@ -40,9 +27,52 @@ export function isBoolean(value: any): value is string {
4027
}
4128

4229
export function isObject(value: any): value is object {
43-
return typeof value === 'object' && value && !Array.isArray(value)
30+
return typeof value === 'object' && value !== null && !Array.isArray(value)
4431
}
4532

4633
export function isArray(value: any): value is any[] {
4734
return Array.isArray(value)
4835
}
36+
37+
export function sleep(timeout: number) {
38+
return new Promise((resolve) => setTimeout(resolve, timeout))
39+
}
40+
41+
export function random(max: number, min = 0) {
42+
let result = Math.floor(Math.random() * max)
43+
44+
while (result < min) {
45+
result = Math.floor(Math.random() * max)
46+
}
47+
48+
return result
49+
}
50+
51+
export function mergeSort<T extends any[]>(arr: T): T {
52+
if (arr.length <= 1) return arr
53+
54+
const mid = Math.floor(arr.length / 2)
55+
const newLeftArr = mergeSort(arr.slice(0, mid))
56+
const newRightArr = mergeSort(arr.slice(mid))
57+
58+
const newArr = [] as any as T
59+
let i = 0
60+
let j = 0
61+
while (i < newLeftArr.length && j < newRightArr.length) {
62+
if (newLeftArr[i] <= newRightArr[j]) {
63+
newArr.push(newLeftArr[i++])
64+
} else {
65+
newArr.push(newRightArr[j++])
66+
}
67+
}
68+
69+
if (i < newLeftArr.length) {
70+
newArr.push(...newLeftArr.slice(i))
71+
}
72+
73+
if (j < newRightArr.length) {
74+
newArr.push(...newRightArr.splice(j))
75+
}
76+
77+
return newArr
78+
}

test/start/index.ts

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,34 @@
1+
import path from 'node:path'
12
import xCrawl from 'x-crawl'
23

3-
const testXCrawl = xCrawl()
4-
5-
testXCrawl.crawlHTML([
6-
'http://localhost:8888/html',
7-
'http://localhost:8888/html',
8-
'http://localhost:8888/html'
9-
])
4+
const pathResolve = (dirPath: string) => path.resolve(__dirname, dirPath)
105

11-
testXCrawl.crawlHTML('http://localhost:8888/html')
6+
const testXCrawl = xCrawl()
127

13-
testXCrawl.crawlFile({
14-
targets: [
15-
'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area/4401.jpg',
16-
'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area/4403.jpg'
17-
],
18-
proxy: { urls: ['http://localhost:14892'] }
19-
})
8+
testXCrawl
9+
.crawlFile({
10+
targets: [
11+
{
12+
url: 'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area/4408.jpg',
13+
fileName: '4408',
14+
priority: 1
15+
},
16+
{
17+
url: 'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area/4401.jpg',
18+
fileName: '4401',
19+
priority: 3
20+
},
21+
{
22+
url: 'https://raw.githubusercontent.com/coder-hxl/airbnb-upload/master/area/4406.jpg',
23+
fileName: '4406',
24+
priority: 2
25+
}
26+
],
27+
proxy: { urls: ['http://localhost:14892'] },
28+
storeDirs: pathResolve('./upload')
29+
})
30+
.then((res) => {
31+
res.forEach((item) => {
32+
console.log(item.id, item.data?.data.fileName)
33+
})
34+
})

0 commit comments

Comments
 (0)