Skip to content

Commit 7c92774

Browse files
committed
fetchHTML API exposes more content
1 parent 82030e9 commit 7c92774

File tree

4 files changed

+51
-24
lines changed

4 files changed

+51
-24
lines changed

src/index.ts

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ import {
1111
IFetchDataConfig,
1212
IFetchFileConfig,
1313
IFetchBaseConifg,
14-
IFileInfo,
1514
IFetchCommon,
15+
IFileInfo,
16+
IFetchHTML,
1617
IRequestResItem,
1718
IRequestConfig,
1819
IIntervalTime
@@ -77,7 +78,7 @@ export default class XCrawl {
7778
return requestRes
7879
}
7980

80-
async fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM> {
81+
async fetchHTML(config: IFetchHTMLConfig): Promise<IFetchHTML> {
8182
const rawRequestConifg: IFetchHTMLConfig = isString(config)
8283
? { url: config }
8384
: config
@@ -86,11 +87,18 @@ export default class XCrawl {
8687
requestConifg: rawRequestConifg
8788
})
8889

89-
const requestResItem = await request(requestConifg)
90+
const requestRes = await request(requestConifg)
91+
const rawData = requestRes.data.toString()
9092

91-
const dom = new JSDOM(requestResItem.data)
93+
const res: IFetchHTML = {
94+
...requestRes,
95+
data: {
96+
raw: rawData,
97+
jsdom: new JSDOM(rawData)
98+
}
99+
}
92100

93-
return dom
101+
return res
94102
}
95103

96104
async fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>> {

src/types.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { IncomingHttpHeaders } from 'node:http'
2+
import { JSDOM } from 'jsdom'
23

34
export interface IAnyObject extends Object {
45
[key: string | number | symbol]: any
@@ -67,7 +68,7 @@ export interface IFetchBaseConifg {
6768
intervalTime?: IIntervalTime
6869
}
6970

70-
export interface IFetchHTMLConfig extends IRequestConfig {}
71+
export type IFetchHTMLConfig = string | IRequestConfig
7172

7273
export interface IFetchDataConfig extends IFetchBaseConifg {}
7374

@@ -77,16 +78,25 @@ export interface IFetchFileConfig extends IFetchBaseConifg {
7778
}
7879
}
7980

81+
export type IFetchCommon<T> = {
82+
id: number
83+
statusCode: number | undefined
84+
headers: IncomingHttpHeaders
85+
data: T
86+
}[]
87+
8088
export interface IFileInfo {
8189
fileName: string
8290
mimeType: string
8391
size: number
8492
filePath: string
8593
}
8694

87-
export type IFetchCommon<T> = {
88-
id: number
95+
export interface IFetchHTML {
8996
statusCode: number | undefined
9097
headers: IncomingHttpHeaders
91-
data: T
92-
}[]
98+
data: {
99+
raw: string
100+
jsdom: JSDOM
101+
}
102+
}

test/start/index.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/start/index.ts

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,19 @@ const testXCrawl = new XCrawl({
1010
mode: 'sync'
1111
})
1212

13-
testXCrawl
14-
.fetchData({
15-
requestConifg: [
16-
{ url: 'http://localhost:3001/home' },
17-
{ url: 'http://localhost:9001/api/home/wonderfulplace' },
18-
{ url: 'http://localhost:9001/api/home/goodprice' },
19-
{ url: 'http://localhost:3001/home' },
20-
{ url: 'http://localhost:9001/ai/home/goodprice' }
21-
]
22-
})
23-
.then((res) => {
24-
console.log(res)
25-
})
13+
// testXCrawl
14+
// .fetchData({
15+
// requestConifg: [
16+
// { url: 'http://localhost:3001/home' },
17+
// { url: 'http://localhost:9001/api/home/wonderfulplace' },
18+
// { url: 'http://localhost:9001/api/home/goodprice' },
19+
// { url: 'http://localhost:3001/home' },
20+
// { url: 'http://localhost:9001/ai/home/goodprice' }
21+
// ]
22+
// })
23+
// .then((res) => {
24+
// console.log(res)
25+
// })
2626

2727
// testXCrawl.fetchHTML({ url: 'https://www.bilibili.com/' }).then((jsdom) => {
2828
// const document = jsdom.window.document
@@ -52,3 +52,12 @@ testXCrawl
5252
// console.log(res)
5353
// })
5454
// })
55+
56+
testXCrawl.fetchHTML('https://cn.bing.com').then((res) => {
57+
const { jsdom } = res.data
58+
})
59+
60+
testXCrawl.fetchHTML('https://docs.github.com/zh/get-started').then((res) => {
61+
const { jsdom } = res.data
62+
console.log(jsdom.window.document.querySelector('title')?.textContent)
63+
})

0 commit comments

Comments
 (0)