Skip to content

Commit 4890a9b

Browse files
committed
add requst mode option: async/sync
1 parent b2e9f07 commit 4890a9b

File tree

3 files changed

+156
-87
lines changed

3 files changed

+156
-87
lines changed

src/index.ts

Lines changed: 57 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import fs from 'node:fs'
22
import path from 'node:path'
33
import { JSDOM } from 'jsdom'
44

5-
import { batchRequest, request } from './request'
5+
import { batchRequest, syncBatchRequest, request } from './request'
66
import { isArray, isString, isUndefined } from './utils'
77

88
import {
@@ -13,7 +13,9 @@ import {
1313
IFetchBaseConifg,
1414
IFileInfo,
1515
IFetchCommon,
16-
IRequestResItem
16+
IRequestResItem,
17+
IRequestConfig,
18+
IIntervalTime
1719
} from './types'
1820

1921
function mergeConfig<T extends IFetchBaseConifg>(
@@ -57,6 +59,24 @@ export default class XCrawl {
5759
this.baseConfig = baseConfig
5860
}
5961

62+
private async useBatchRequestByMode(
63+
requestConifg: IRequestConfig | IRequestConfig[],
64+
intervalTime: IIntervalTime | undefined
65+
) {
66+
const requestConfigQueue = isArray(requestConifg)
67+
? requestConifg
68+
: [requestConifg]
69+
70+
let requestRes: IRequestResItem[] = []
71+
if (this.baseConfig.mode !== 'sync') {
72+
requestRes = await batchRequest(requestConfigQueue, intervalTime)
73+
} else {
74+
requestRes = await syncBatchRequest(requestConfigQueue, intervalTime)
75+
}
76+
77+
return requestRes
78+
}
79+
6080
async fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM> {
6181
const rawRequestConifg: IFetchHTMLConfig = isString(config)
6282
? { url: config }
@@ -76,58 +96,41 @@ export default class XCrawl {
7696
async fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>> {
7797
const { requestConifg, intervalTime } = mergeConfig(this.baseConfig, config)
7898

79-
const requestConfigQueue = isArray(requestConifg)
80-
? requestConifg
81-
: [requestConifg]
99+
const requestRes = await this.useBatchRequestByMode(
100+
requestConifg,
101+
intervalTime
102+
)
82103

83104
const container: IFetchCommon<T> = []
84105

85-
await batchRequest(
86-
requestConfigQueue,
87-
intervalTime,
88-
(error, requestResItem) => {
89-
if (error) return
90-
91-
const contentType = requestResItem.headers['content-type'] ?? ''
92-
const rawData = requestResItem.data
106+
requestRes.forEach((item) => {
107+
const contentType = item.headers['content-type'] ?? ''
108+
const rawData = item.data
93109

94-
const data = contentType.includes('text')
95-
? rawData.toString()
96-
: JSON.parse(rawData.toString())
110+
const data = contentType.includes('text')
111+
? rawData.toString()
112+
: JSON.parse(rawData.toString())
97113

98-
container.push({ ...requestResItem, data })
99-
}
100-
)
114+
container.push({ ...item, data })
115+
})
101116

102117
return container
103118
}
104119

105-
fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>> {
106-
return new Promise((resolve) => {
107-
const { requestConifg, intervalTime, fileConfig } = mergeConfig(
108-
this.baseConfig,
109-
config
110-
)
111-
112-
const requestConfigQueue = isArray(requestConifg)
113-
? requestConifg
114-
: [requestConifg]
120+
async fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>> {
121+
const { requestConifg, intervalTime, fileConfig } = mergeConfig(
122+
this.baseConfig,
123+
config
124+
)
125+
const requestRes = await this.useBatchRequestByMode(
126+
requestConifg,
127+
intervalTime
128+
)
115129

116-
const requestTotal = requestConfigQueue.length
130+
return new Promise((resolve) => {
117131
const container: IFetchCommon<IFileInfo> = []
118132

119-
function batchRequestResHandle(
120-
error: Error | null,
121-
requestResItem: IRequestResItem
122-
) {
123-
if (error) {
124-
if (requestResItem.id === requestTotal) {
125-
resolve(container)
126-
}
127-
128-
return
129-
}
130-
133+
requestRes.forEach((requestResItem, index) => {
131134
const { id, statusCode, headers, data } = requestResItem
132135

133136
const mimeType = headers['content-type'] ?? ''
@@ -140,25 +143,23 @@ export default class XCrawl {
140143

141144
fs.createWriteStream(filePath, 'binary').write(data, (err) => {
142145
if (err) {
143-
return console.log(`File save error at id ${id}: ${err.message}`)
146+
console.log(`File save error at id ${id}: ${err.message}`)
147+
} else {
148+
const fileInfo: IFileInfo = {
149+
fileName,
150+
mimeType,
151+
size: data.length,
152+
filePath
153+
}
154+
155+
container.push({ id, statusCode, headers, data: fileInfo })
144156
}
145157

146-
const fileInfo: IFileInfo = {
147-
fileName,
148-
mimeType,
149-
size: data.length,
150-
filePath
151-
}
152-
153-
container.push({ id, statusCode, headers, data: fileInfo })
154-
155-
if (id === requestTotal) {
158+
if (index === requestRes.length - 1) {
156159
resolve(container)
157160
}
158161
})
159-
}
160-
161-
batchRequest(requestConfigQueue, intervalTime, batchRequestResHandle)
162+
})
162163
})
163164
}
164165
}

src/request.ts

Lines changed: 98 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import {
1313
IRequestResItem
1414
} from './types'
1515

16-
export function parseParams(urlSearch: string, params?: IAnyObject): string {
16+
function parseParams(urlSearch: string, params?: IAnyObject): string {
1717
let res = urlSearch ? `${urlSearch}` : '?'
1818

1919
if (params) {
@@ -28,7 +28,7 @@ export function parseParams(urlSearch: string, params?: IAnyObject): string {
2828
return res
2929
}
3030

31-
export function parseHeaders(
31+
function parseHeaders(
3232
rawConfig: IRequestConfig,
3333
config: RequestOptions & IMapTypeEmptyObject<URL>
3434
) {
@@ -47,7 +47,7 @@ export function parseHeaders(
4747
return headers
4848
}
4949

50-
export function handleRequestConfig(
50+
function handleRequestConfig(
5151
rawConfig: IRequestConfig
5252
): RequestOptions & IMapTypeEmptyObject<URL> {
5353
const { protocol, hostname, port, pathname, search } = new Url.URL(
@@ -77,6 +77,27 @@ export function handleRequestConfig(
7777
return config
7878
}
7979

80+
async function useSleepByBatch(
81+
isHaveIntervalTime: boolean,
82+
isNumberIntervalTime: boolean,
83+
intervalTime: any,
84+
id: number
85+
) {
86+
if (isHaveIntervalTime && id > 1) {
87+
const timeout: number = isNumberIntervalTime
88+
? intervalTime
89+
: random(intervalTime.max, intervalTime.min)
90+
91+
console.log(
92+
`Request ${id} needs to sleep for ${timeout} milliseconds before sending`
93+
)
94+
95+
await sleep(timeout)
96+
} else {
97+
console.log(`Request ${id} does not need to sleep, send immediately`)
98+
}
99+
}
100+
80101
export function request(config: IRequestConfig) {
81102
return new Promise<IRequest>((resolve, reject) => {
82103
const isDataUndefine = isUndefined(config.data)
@@ -122,47 +143,93 @@ export function request(config: IRequestConfig) {
122143

123144
export async function batchRequest(
124145
requestConifgs: IRequestConfig[],
125-
intervalTime: IIntervalTime | undefined,
126-
batchRequestResHandle: (
127-
error: Error | null,
128-
requestResItem: IRequestResItem
129-
) => void
146+
intervalTime: IIntervalTime | undefined
130147
) {
131-
const total = requestConifgs.length
132-
let id = 0
133-
134148
const isHaveIntervalTime = !isUndefined(intervalTime)
135149
const isNumberIntervalTime = isNumber(intervalTime)
136150

137-
console.log(`Begin execution, total: ${total} `)
151+
console.log(`Begin execution, mode: async, total: ${requestConifgs.length} `)
138152

153+
const requestQueue: Promise<IRequestResItem | string>[] = []
154+
155+
let index = 0
139156
for (const requestConifg of requestConifgs) {
140-
id++
157+
const id = ++index
158+
159+
await useSleepByBatch(
160+
isHaveIntervalTime,
161+
isNumberIntervalTime,
162+
intervalTime,
163+
id
164+
)
165+
166+
const requestItem = request(requestConifg)
167+
.catch((error: any) => {
168+
return `Request ${id} is an error: ${error.message}`
169+
})
170+
.then((requestRes) => {
171+
if (typeof requestRes === 'string') return requestRes
141172

142-
let state = 'success'
143-
let error: Error | null = null
173+
return { id, ...requestRes }
174+
})
144175

145-
let requestRes: IRequest = {} as IRequest
146-
try {
147-
requestRes = await request(requestConifg)
148-
} catch (err: any) {
149-
error = err
150-
state = `error: ${err.message}`
176+
requestQueue.push(requestItem)
177+
}
178+
179+
console.log('All requests have been sent!')
180+
181+
const res = await Promise.all(requestQueue)
182+
183+
const success: IRequestResItem[] = []
184+
const error: string[] = []
185+
186+
// 通过类型分类
187+
res.forEach((item) => {
188+
if (typeof item === 'string') {
189+
return error.push(item)
151190
}
152191

153-
batchRequestResHandle(error, { id, ...requestRes })
192+
success.push(item)
193+
})
194+
195+
error.forEach((message) => {
196+
console.log(message)
197+
})
198+
199+
return success
200+
}
201+
202+
export async function syncBatchRequest(
203+
requestConifgs: IRequestConfig[],
204+
intervalTime: IIntervalTime | undefined
205+
) {
206+
const isHaveIntervalTime = !isUndefined(intervalTime)
207+
const isNumberIntervalTime = isNumber(intervalTime)
208+
209+
console.log(`Begin execution, mode: sync, total: ${requestConifgs.length} `)
154210

155-
if (isHaveIntervalTime && id !== total) {
156-
const timeout = isNumberIntervalTime
157-
? intervalTime
158-
: random(intervalTime.max, intervalTime.min)
211+
let id = 0
212+
const requestRes: IRequestResItem[] = []
213+
for (const requestConifg of requestConifgs) {
214+
id++
159215

160-
console.log(`The ${id} request is ${state}, sleep for ${timeout}ms`)
216+
await useSleepByBatch(
217+
isHaveIntervalTime,
218+
isNumberIntervalTime,
219+
intervalTime,
220+
id
221+
)
161222

162-
await sleep(timeout)
163-
} else {
164-
console.log(`The ${id} request is ${state}`)
165-
console.log(`All requests completed!`)
223+
try {
224+
const requestResItem = await request(requestConifg)
225+
requestRes.push({ id, ...requestResItem })
226+
console.log(`Request ${id} is an success`)
227+
} catch (error: any) {
228+
console.log(`Request ${id} is an error: ${error.message}`)
166229
}
167230
}
231+
232+
console.log('All requests are over!')
233+
234+
return requestRes
168235
}

src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ export interface IXCrawlBaseConifg {
5959
baseUrl?: string
6060
timeout?: number
6161
intervalTime?: IIntervalTime
62+
mode?: 'async' | 'sync'
6263
}
6364

6465
export interface IFetchBaseConifg {

0 commit comments

Comments
 (0)