Skip to content

Commit f9ff12f

Browse files
committed
迁移部分函数
1 parent 5fe4966 commit f9ff12f

File tree

3 files changed

+113
-123
lines changed

3 files changed

+113
-123
lines changed

src/index.ts

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,52 @@ import path from 'node:path'
33
import { JSDOM } from 'jsdom'
44

55
import { batchRequest, request } from './request'
6-
import { isArray, mergeConfig } from './utils'
6+
import { isArray, isUndefined } from './utils'
77

88
import {
9+
IXCrawlBaseConifg,
910
IFetchData,
1011
IFetchDataConfig,
1112
IFetchFile,
1213
IFetchFileConfig,
13-
IRequest,
14-
IXCrawlBaseConifg
14+
IFetchBaseConifg,
15+
IRequest
1516
} from './types'
1617

18+
function mergeConfig<T extends IFetchBaseConifg>(
19+
baseConfig: IXCrawlBaseConifg,
20+
config: T
21+
): IFetchBaseConifg & T {
22+
const {
23+
baseUrl,
24+
timeout: baseTimeout,
25+
intervalTime: baseIntervalTime
26+
} = baseConfig
27+
const { requestConifg, intervalTime } = config
28+
29+
const requestConifgArr = isArray(requestConifg)
30+
? requestConifg
31+
: [requestConifg]
32+
33+
for (const requestItem of requestConifgArr) {
34+
const { url, timeout } = requestItem
35+
36+
if (!isUndefined(baseUrl)) {
37+
requestItem.url = baseUrl + url
38+
}
39+
40+
if (isUndefined(timeout) && !isUndefined(baseTimeout)) {
41+
requestItem.timeout = baseTimeout
42+
}
43+
}
44+
45+
if (isUndefined(intervalTime) && !isUndefined(baseIntervalTime)) {
46+
config.intervalTime = baseIntervalTime
47+
}
48+
49+
return config
50+
}
51+
1752
export default class XCrawl {
1853
private readonly baseConfig: IXCrawlBaseConifg
1954

src/request.ts

Lines changed: 75 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,80 @@
1-
import http from 'node:http'
1+
import http, { Agent, RequestOptions } from 'node:http'
2+
import { Agent as httpsAgent } from 'https'
3+
import Url, { URL } from 'node:url'
4+
5+
import { isNumber, isUndefined, random, sleep } from './utils'
26

37
import {
4-
handleRequestConfig,
5-
isNumber,
6-
isUndefined,
7-
random,
8-
sleep
9-
} from './utils'
10-
11-
import { IIntervalTime, IRequest, IRequestConfig } from './types'
8+
IIntervalTime,
9+
IRequest,
10+
IRequestConfig,
11+
IAnyObject,
12+
IMapTypeEmptyObject
13+
} from './types'
14+
15+
export function parseParams(urlSearch: string, params?: IAnyObject): string {
16+
let res = urlSearch ? `${urlSearch}` : '?'
17+
18+
if (params) {
19+
for (const key in params) {
20+
const value = params[key]
21+
res += `&${key}=${value}`
22+
}
23+
} else {
24+
res = urlSearch
25+
}
26+
27+
return res
28+
}
29+
30+
export function parseHeaders(
31+
rawConfig: IRequestConfig,
32+
config: RequestOptions & IMapTypeEmptyObject<URL>
33+
) {
34+
const rawHeaders = rawConfig.headers ?? {}
35+
const headers: IAnyObject = {
36+
'User-Agent':
37+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
38+
...rawHeaders
39+
}
40+
41+
if (config.method === 'POST' && rawConfig.data) {
42+
headers['Content-Type'] = 'application/json'
43+
headers['Content-Length'] = Buffer.byteLength(rawConfig.data)
44+
}
45+
46+
return headers
47+
}
48+
49+
export function handleRequestConfig(
50+
rawConfig: IRequestConfig
51+
): RequestOptions & IMapTypeEmptyObject<URL> {
52+
const { protocol, hostname, port, pathname, search } = new Url.URL(
53+
rawConfig.url
54+
)
55+
56+
const config: RequestOptions & IMapTypeEmptyObject<URL> = {
57+
protocol,
58+
hostname,
59+
port,
60+
path: pathname,
61+
search: parseParams(search, rawConfig.params),
62+
63+
method: rawConfig.method?.toLocaleUpperCase() ?? 'GET',
64+
headers: {},
65+
timeout: rawConfig.timeout
66+
}
67+
68+
config.headers = parseHeaders(rawConfig, config)
69+
70+
if (protocol === 'http:') {
71+
config.agent = new Agent()
72+
} else {
73+
config.agent = new httpsAgent()
74+
}
75+
76+
return config
77+
}
1278

1379
export function request(config: IRequestConfig) {
1480
return new Promise<IRequest>((resolve, reject) => {

src/utils.ts

Lines changed: 0 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1,114 +1,3 @@
1-
import { Agent } from 'http'
2-
import { Agent as httpsAgent } from 'https'
3-
import Url, { URL } from 'node:url'
4-
5-
import { RequestOptions } from 'http'
6-
import {
7-
IAnyObject,
8-
IFetchBaseConifg,
9-
IMapTypeEmptyObject,
10-
IRequestConfig,
11-
IXCrawlBaseConifg
12-
} from './types'
13-
14-
export function parseParams(urlSearch: string, params?: IAnyObject): string {
15-
let res = urlSearch ? `${urlSearch}` : '?'
16-
17-
if (params) {
18-
for (const key in params) {
19-
const value = params[key]
20-
res += `&${key}=${value}`
21-
}
22-
} else {
23-
res = urlSearch
24-
}
25-
26-
return res
27-
}
28-
29-
export function parseHeaders(
30-
rawConfig: IRequestConfig,
31-
config: RequestOptions & IMapTypeEmptyObject<URL>
32-
) {
33-
const rawHeaders = rawConfig.headers ?? {}
34-
const headers: IAnyObject = {
35-
'User-Agent':
36-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
37-
...rawHeaders
38-
}
39-
40-
if (config.method === 'POST' && rawConfig.data) {
41-
headers['Content-Type'] = 'application/json'
42-
headers['Content-Length'] = Buffer.byteLength(rawConfig.data)
43-
}
44-
45-
return headers
46-
}
47-
48-
export function handleRequestConfig(
49-
rawConfig: IRequestConfig
50-
): RequestOptions & IMapTypeEmptyObject<URL> {
51-
const { protocol, hostname, port, pathname, search } = new Url.URL(
52-
rawConfig.url
53-
)
54-
55-
const config: RequestOptions & IMapTypeEmptyObject<URL> = {
56-
protocol,
57-
hostname,
58-
port,
59-
path: pathname,
60-
search: parseParams(search, rawConfig.params),
61-
62-
method: rawConfig.method?.toLocaleUpperCase() ?? 'GET',
63-
headers: {},
64-
timeout: rawConfig.timeout
65-
}
66-
67-
config.headers = parseHeaders(rawConfig, config)
68-
69-
if (protocol === 'http:') {
70-
config.agent = new Agent()
71-
} else {
72-
config.agent = new httpsAgent()
73-
}
74-
75-
return config
76-
}
77-
78-
export function mergeConfig<T extends IFetchBaseConifg>(
79-
baseConfig: IXCrawlBaseConifg,
80-
config: T
81-
): IFetchBaseConifg & T {
82-
const {
83-
baseUrl,
84-
timeout: baseTimeout,
85-
intervalTime: baseIntervalTime
86-
} = baseConfig
87-
const { requestConifg, intervalTime } = config
88-
89-
const requestConifgArr = isArray(requestConifg)
90-
? requestConifg
91-
: [requestConifg]
92-
93-
for (const requestItem of requestConifgArr) {
94-
const { url, timeout } = requestItem
95-
96-
if (!isUndefined(baseUrl)) {
97-
requestItem.url = baseUrl + url
98-
}
99-
100-
if (isUndefined(timeout) && !isUndefined(baseTimeout)) {
101-
requestItem.timeout = baseTimeout
102-
}
103-
}
104-
105-
if (isUndefined(intervalTime) && !isUndefined(baseIntervalTime)) {
106-
config.intervalTime = baseIntervalTime
107-
}
108-
109-
return config
110-
}
111-
1121
export function sleep(timeout: number) {
1132
return new Promise((resolve) => setTimeout(resolve, timeout))
1143
}

0 commit comments

Comments
 (0)