2
2
import { PlaywrightCrawler } from "crawlee" ;
3
3
import { readFile , writeFile } from "fs/promises" ;
4
4
import { glob } from "glob" ;
5
- import { Config } from ".. /config.js" ;
5
+ import { Config } from "./config.js" ;
6
6
import { Page } from "playwright" ;
7
7
8
8
let pageCounter = 0 ;
9
9
10
- export function getPageHtml ( page : Page , selector : string ) {
10
+ export function getPageHtml ( page : Page , selector = "body" ) {
11
11
return page . evaluate ( ( selector ) => {
12
12
// Check if the selector is an XPath
13
13
if ( selector . startsWith ( "/" ) ) {
@@ -16,7 +16,7 @@ export function getPageHtml(page: Page, selector: string) {
16
16
document ,
17
17
null ,
18
18
XPathResult . ANY_TYPE ,
19
- null ,
19
+ null
20
20
) ;
21
21
let result = elements . iterateNext ( ) ;
22
22
return result ? result . textContent || "" : "" ;
@@ -36,12 +36,12 @@ export async function waitForXPath(page: Page, xpath: string, timeout: number) {
36
36
document ,
37
37
null ,
38
38
XPathResult . ANY_TYPE ,
39
- null ,
39
+ null
40
40
) ;
41
41
return elements . iterateNext ( ) !== null ;
42
42
} ,
43
43
xpath ,
44
- { timeout } ,
44
+ { timeout }
45
45
) ;
46
46
}
47
47
@@ -65,20 +65,22 @@ export async function crawl(config: Config) {
65
65
const title = await page . title ( ) ;
66
66
pageCounter ++ ;
67
67
log . info (
68
- `Crawling: Page ${ pageCounter } / ${ config . maxPagesToCrawl } - URL: ${ request . loadedUrl } ...` ,
68
+ `Crawling: Page ${ pageCounter } / ${ config . maxPagesToCrawl } - URL: ${ request . loadedUrl } ...`
69
69
) ;
70
70
71
71
// Use custom handling for XPath selector
72
- if ( config . selector . startsWith ( "/" ) ) {
73
- await waitForXPath (
74
- page ,
75
- config . selector ,
76
- config . waitForSelectorTimeout ?? 1000 ,
77
- ) ;
78
- } else {
79
- await page . waitForSelector ( config . selector , {
80
- timeout : config . waitForSelectorTimeout ?? 1000 ,
81
- } ) ;
72
+ if ( config . selector ) {
73
+ if ( config . selector . startsWith ( "/" ) ) {
74
+ await waitForXPath (
75
+ page ,
76
+ config . selector ,
77
+ config . waitForSelectorTimeout ?? 1000
78
+ ) ;
79
+ } else {
80
+ await page . waitForSelector ( config . selector , {
81
+ timeout : config . waitForSelectorTimeout ?? 1000 ,
82
+ } ) ;
83
+ }
82
84
}
83
85
84
86
const html = await getPageHtml ( page , config . selector ) ;
0 commit comments