Skip to content

Commit c470107

Browse files
committed
Pending changes exported from your codespace
1 parent 40464cb commit c470107

File tree

8 files changed

+105
-43
lines changed

8 files changed

+105
-43
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# crawler
1+
# x-crawl
22

3-
Lightweight nodejs crawler library that can help you crawl data and files.
3+
XCrawl is a Nodejs crawl library, providing configurations to help you crawl data or files in batches.
44

5-
轻量级的 nodejs 爬虫库,可以帮助你抓取数据和文件
5+
XCrawl 是 Nodejs 爬虫库,提供配置即可帮你批量抓取数据或文件

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
{
22
"private": true,
3-
"name": "crawler",
3+
"name": "x-crawler",
44
"version": "0.0.1",
55
"author": "coderhxl",
6-
"description": "爬虫库",
6+
"description": "XCrawl is a Nodejs crawl library, providing configurations to help you crawl data or files in batches.",
77
"license": "MIT",
88
"main": "src/index.ts",
99
"scripts": {

publish/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
# crawler
1+
# x-crawl
2+
3+
XCrawl is a Nodejs crawl library, providing configurations to help you crawl data or files in batches.
4+
5+
XCrawl 是 Nodejs 爬虫库,提供配置即可帮你批量抓取数据或文件。

publish/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
2-
"name": "crawler",
2+
"name": "x-crawler",
33
"version": "0.0.1",
44
"author": "coderhxl",
5-
"description": "爬虫库",
5+
"description": "XCrawl is a Nodejs crawl library, providing configurations to help you crawl data or files in batches.",
66
"license": "MIT",
77
"main": "dist/index.js",
88
"types": "",

src/index.ts

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1-
import { fetch, fetchFile } from './service'
1+
import { fetch } from './service'
22

3-
const crawler = {
4-
fetch,
5-
fetchFile
6-
}
3+
import { IFetchConfig, XCrawlConifg } from './types'
4+
import { loaderBaseConfig } from './utils'
5+
6+
export default class XCrawl {
7+
baseConfig: XCrawlConifg
78

8-
export { fetch, fetchFile }
9+
constructor(XCrawlConfig: XCrawlConifg) {
10+
this.baseConfig = XCrawlConfig
11+
}
912

10-
export default crawler
13+
async fetch<T = any>(config: IFetchConfig): Promise<T> {
14+
const loaderRes = loaderBaseConfig(this.baseConfig, config)
15+
16+
return fetch(loaderRes)
17+
}
18+
}

src/service.ts

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ import {
1010
IRequestConfig
1111
} from './types'
1212

13-
function request(config: IRequestConfig) {
13+
export function request(config: IRequestConfig) {
1414
return new Promise<IRequest>((resolve, reject) => {
15-
const data = (config.data = JSON.stringify(config.data ?? ''))
15+
const data = (config.data = config.data
16+
? JSON.stringify(config.data ?? '')
17+
: config.data)
1618
const handleConfigRes = handleConfig(config)
1719

1820
const req = https.request(handleConfigRes, (res) => {
@@ -54,30 +56,31 @@ function request(config: IRequestConfig) {
5456

5557
export async function fetch<T = any>(config: IFetchConfig): Promise<T> {
5658
const { requestConifg, intervalTime } = config
59+
const isRequestConifgArr = Array.isArray(requestConifg)
60+
const requestConifgArr = isRequestConifgArr ? requestConifg : [requestConifg]
5761

58-
let res
59-
if (Array.isArray(requestConifg)) {
60-
res = []
62+
const total = requestConifgArr.length
63+
let currentCount = 0
6164

62-
for (const item of requestConifg) {
63-
const requestRes = await request(item)
64-
res.push(JSON.parse(requestRes.data.toString()))
65+
const container = []
6566

66-
if (typeof intervalTime !== 'undefined') {
67-
const timeout =
68-
typeof intervalTime === 'number'
69-
? intervalTime
70-
: random(intervalTime.max, intervalTime.min)
67+
for (const item of requestConifgArr) {
68+
currentCount++
7169

72-
await sleep(timeout)
73-
}
70+
const requestRes = await request(item)
71+
container.push(JSON.parse(requestRes.data.toString()))
72+
73+
if (typeof intervalTime !== 'undefined' && currentCount !== total) {
74+
const timeout =
75+
typeof intervalTime === 'number'
76+
? intervalTime
77+
: random(intervalTime.max, intervalTime.min)
78+
79+
await sleep(timeout)
7480
}
75-
} else {
76-
const requestRes = await request(requestConifg)
77-
res = JSON.parse(requestRes.data.toString())
7881
}
7982

80-
return res
83+
return isRequestConifgArr ? container : container[0]
8184
}
8285

8386
export async function fetchFile(config: IFetchFileConfig) {

src/types.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { IncomingHttpHeaders, OutgoingHttpHeaders } from 'node:http'
2+
import { type } from 'node:os'
23

34
export interface IAnyObject extends Object {
45
[key: string | number | symbol]: any
@@ -48,14 +49,17 @@ export interface IRequestConfig {
4849
timeout?: number
4950
}
5051

52+
export type IIntervalTime = number | { max: number; min?: number }
53+
5154
export interface IFetchBaseConifg {
5255
requestConifg: IRequestConfig | IRequestConfig[]
53-
intervalTime?:
54-
| number
55-
| {
56-
max: number
57-
min?: number
58-
}
56+
intervalTime?: IIntervalTime
57+
}
58+
59+
export interface XCrawlConifg {
60+
baseUrl?: string
61+
timeout?: number
62+
intervalTime?: IIntervalTime
5963
}
6064

6165
export interface IFetchConfig extends IFetchBaseConifg {}

src/utils.ts

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
import { RequestOptions } from 'http'
2-
32
import Url, { URL } from 'node:url'
4-
import { IAnyObject, IMapTypeEmptyObject, IRequestConfig } from './types'
3+
4+
import {
5+
IAnyObject,
6+
IFetchConfig,
7+
IMapTypeEmptyObject,
8+
IRequestConfig,
9+
XCrawlConifg
10+
} from './types'
511

612
export function parseParams(urlSearch: string, params?: IAnyObject): string {
713
let res = urlSearch ? `${urlSearch}` : '?'
@@ -49,14 +55,51 @@ export function handleConfig(
4955
search: parseParams(search, rawConfig.params),
5056

5157
method: rawConfig.method.toLocaleUpperCase(),
52-
headers: {}
58+
headers: {},
59+
timeout: rawConfig.timeout
5360
}
5461

5562
config.headers = parseHeaders(rawConfig, config)
5663

5764
return config
5865
}
5966

67+
export function loaderBaseConfig(
68+
baseConfig: XCrawlConifg,
69+
config: IFetchConfig
70+
) {
71+
const {
72+
baseUrl,
73+
timeout: baseTimeout,
74+
intervalTime: baseIntervalTime
75+
} = baseConfig
76+
const { requestConifg, intervalTime } = config
77+
78+
const requestConifgArr = Array.isArray(requestConifg)
79+
? [...requestConifg]
80+
: [requestConifg]
81+
82+
for (const requestItem of requestConifgArr) {
83+
const { url, timeout } = requestItem
84+
85+
requestItem.url = baseUrl + url
86+
87+
if (isUndefined(timeout) && !isUndefined(baseTimeout)) {
88+
requestItem.timeout = baseTimeout
89+
}
90+
}
91+
92+
if (isUndefined(intervalTime) && !isUndefined(baseIntervalTime)) {
93+
config.intervalTime = baseIntervalTime
94+
}
95+
96+
return config
97+
}
98+
99+
export function isUndefined(value: any) {
100+
return typeof value === 'undefined'
101+
}
102+
60103
export function sleep(timeout: number) {
61104
return new Promise((resolve) => setTimeout(resolve, timeout))
62105
}

0 commit comments

Comments
 (0)