Skip to content

Commit 584c3d3

Browse files
committed
fixes
1 parent 61fbb62 commit 584c3d3

File tree

2 files changed

+19
-17
lines changed

2 files changed

+19
-17
lines changed

src/cli.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env node
22

33
import { program } from "commander";
4-
import { Config } from "../config.js";
4+
import { Config } from "./config.js";
55
import { crawl, write } from "./core.js";
66
import { createRequire } from "node:module";
77
import inquirer from "inquirer";

src/core.ts

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
import { PlaywrightCrawler } from "crawlee";
33
import { readFile, writeFile } from "fs/promises";
44
import { glob } from "glob";
5-
import { Config } from "../config.js";
5+
import { Config } from "./config.js";
66
import { Page } from "playwright";
77

88
let pageCounter = 0;
99

10-
export function getPageHtml(page: Page, selector: string) {
10+
export function getPageHtml(page: Page, selector = "body") {
1111
return page.evaluate((selector) => {
1212
// Check if the selector is an XPath
1313
if (selector.startsWith("/")) {
@@ -16,7 +16,7 @@ export function getPageHtml(page: Page, selector: string) {
1616
document,
1717
null,
1818
XPathResult.ANY_TYPE,
19-
null,
19+
null
2020
);
2121
let result = elements.iterateNext();
2222
return result ? result.textContent || "" : "";
@@ -36,12 +36,12 @@ export async function waitForXPath(page: Page, xpath: string, timeout: number) {
3636
document,
3737
null,
3838
XPathResult.ANY_TYPE,
39-
null,
39+
null
4040
);
4141
return elements.iterateNext() !== null;
4242
},
4343
xpath,
44-
{ timeout },
44+
{ timeout }
4545
);
4646
}
4747

@@ -65,20 +65,22 @@ export async function crawl(config: Config) {
6565
const title = await page.title();
6666
pageCounter++;
6767
log.info(
68-
`Crawling: Page ${pageCounter} / ${config.maxPagesToCrawl} - URL: ${request.loadedUrl}...`,
68+
`Crawling: Page ${pageCounter} / ${config.maxPagesToCrawl} - URL: ${request.loadedUrl}...`
6969
);
7070

7171
// Use custom handling for XPath selector
72-
if (config.selector.startsWith("/")) {
73-
await waitForXPath(
74-
page,
75-
config.selector,
76-
config.waitForSelectorTimeout ?? 1000,
77-
);
78-
} else {
79-
await page.waitForSelector(config.selector, {
80-
timeout: config.waitForSelectorTimeout ?? 1000,
81-
});
72+
if (config.selector) {
73+
if (config.selector.startsWith("/")) {
74+
await waitForXPath(
75+
page,
76+
config.selector,
77+
config.waitForSelectorTimeout ?? 1000
78+
);
79+
} else {
80+
await page.waitForSelector(config.selector, {
81+
timeout: config.waitForSelectorTimeout ?? 1000,
82+
});
83+
}
8284
}
8385

8486
const html = await getPageHtml(page, config.selector);

0 commit comments

Comments
 (0)