File tree Expand file tree Collapse file tree 4 files changed +51
-24
lines changed Expand file tree Collapse file tree 4 files changed +51
-24
lines changed Original file line number Diff line number Diff line change @@ -11,8 +11,9 @@ import {
11
11
IFetchDataConfig ,
12
12
IFetchFileConfig ,
13
13
IFetchBaseConifg ,
14
- IFileInfo ,
15
14
IFetchCommon ,
15
+ IFileInfo ,
16
+ IFetchHTML ,
16
17
IRequestResItem ,
17
18
IRequestConfig ,
18
19
IIntervalTime
@@ -77,7 +78,7 @@ export default class XCrawl {
77
78
return requestRes
78
79
}
79
80
80
- async fetchHTML ( config : string | IFetchHTMLConfig ) : Promise < JSDOM > {
81
+ async fetchHTML ( config : IFetchHTMLConfig ) : Promise < IFetchHTML > {
81
82
const rawRequestConifg : IFetchHTMLConfig = isString ( config )
82
83
? { url : config }
83
84
: config
@@ -86,11 +87,18 @@ export default class XCrawl {
86
87
requestConifg : rawRequestConifg
87
88
} )
88
89
89
- const requestResItem = await request ( requestConifg )
90
+ const requestRes = await request ( requestConifg )
91
+ const rawData = requestRes . data . toString ( )
90
92
91
- const dom = new JSDOM ( requestResItem . data )
93
+ const res : IFetchHTML = {
94
+ ...requestRes ,
95
+ data : {
96
+ raw : rawData ,
97
+ jsdom : new JSDOM ( rawData )
98
+ }
99
+ }
92
100
93
- return dom
101
+ return res
94
102
}
95
103
96
104
async fetchData < T = any > ( config : IFetchDataConfig ) : Promise < IFetchCommon < T > > {
Original file line number Diff line number Diff line change 1
1
import { IncomingHttpHeaders } from 'node:http'
2
+ import { JSDOM } from 'jsdom'
2
3
3
4
export interface IAnyObject extends Object {
4
5
[ key : string | number | symbol ] : any
@@ -67,7 +68,7 @@ export interface IFetchBaseConifg {
67
68
intervalTime ?: IIntervalTime
68
69
}
69
70
70
- export interface IFetchHTMLConfig extends IRequestConfig { }
71
+ export type IFetchHTMLConfig = string | IRequestConfig
71
72
72
73
export interface IFetchDataConfig extends IFetchBaseConifg { }
73
74
@@ -77,16 +78,25 @@ export interface IFetchFileConfig extends IFetchBaseConifg {
77
78
}
78
79
}
79
80
81
+ export type IFetchCommon < T > = {
82
+ id : number
83
+ statusCode : number | undefined
84
+ headers : IncomingHttpHeaders
85
+ data : T
86
+ } [ ]
87
+
80
88
export interface IFileInfo {
81
89
fileName : string
82
90
mimeType : string
83
91
size : number
84
92
filePath : string
85
93
}
86
94
87
- export type IFetchCommon < T > = {
88
- id : number
95
+ export interface IFetchHTML {
89
96
statusCode : number | undefined
90
97
headers : IncomingHttpHeaders
91
- data : T
92
- } [ ]
98
+ data : {
99
+ raw : string
100
+ jsdom : JSDOM
101
+ }
102
+ }
Original file line number Diff line number Diff line change @@ -10,19 +10,19 @@ const testXCrawl = new XCrawl({
10
10
mode : 'sync'
11
11
} )
12
12
13
- testXCrawl
14
- . fetchData ( {
15
- requestConifg : [
16
- { url : 'http://localhost:3001/home' } ,
17
- { url : 'http://localhost:9001/api/home/wonderfulplace' } ,
18
- { url : 'http://localhost:9001/api/home/goodprice' } ,
19
- { url : 'http://localhost:3001/home' } ,
20
- { url : 'http://localhost:9001/ai/home/goodprice' }
21
- ]
22
- } )
23
- . then ( ( res ) => {
24
- console . log ( res )
25
- } )
13
+ // testXCrawl
14
+ // .fetchData({
15
+ // requestConifg: [
16
+ // { url: 'http://localhost:3001/home' },
17
+ // { url: 'http://localhost:9001/api/home/wonderfulplace' },
18
+ // { url: 'http://localhost:9001/api/home/goodprice' },
19
+ // { url: 'http://localhost:3001/home' },
20
+ // { url: 'http://localhost:9001/ai/home/goodprice' }
21
+ // ]
22
+ // })
23
+ // .then((res) => {
24
+ // console.log(res)
25
+ // })
26
26
27
27
// testXCrawl.fetchHTML({ url: 'https://www.bilibili.com/' }).then((jsdom) => {
28
28
// const document = jsdom.window.document
@@ -52,3 +52,12 @@ testXCrawl
52
52
// console.log(res)
53
53
// })
54
54
// })
55
+
56
+ testXCrawl . fetchHTML ( 'https://cn.bing.com' ) . then ( ( res ) => {
57
+ const { jsdom } = res . data
58
+ } )
59
+
60
+ testXCrawl . fetchHTML ( 'https://docs.github.com/zh/get-started' ) . then ( ( res ) => {
61
+ const { jsdom } = res . data
62
+ console . log ( jsdom . window . document . querySelector ( 'title' ) ?. textContent )
63
+ } )
You can’t perform that action at this time.
0 commit comments