Skip to content

Commit 550f1e6

Browse files
authored
Merge pull request #54 from iperzic/type-validation
Config validation
2 parents e67af23 + 5fdf79b commit 550f1e6

File tree

5 files changed

+48
-17
lines changed

5 files changed

+48
-17
lines changed

package-lock.json

Lines changed: 10 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
"glob": "^10.3.10",
1414
"inquirer": "^9.2.12",
1515
"playwright": "*",
16-
"prettier": "^3.1.0"
16+
"prettier": "^3.1.0",
17+
"zod": "^3.22.4"
1718
},
1819
"devDependencies": {
1920
"@apify/tsconfig": "^0.1.0",

src/config.ts

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,57 @@
1+
import { z } from 'zod';
2+
13
import type { Page } from "playwright";
24

3-
export type Config = {
5+
const Page: z.ZodType<Page> = z.any();
6+
7+
export const configSchema = z.object({
48
/**
59
* URL to start the crawl
610
* @example "https://www.builder.io/c/docs/developers"
711
* @default ""
812
*/
9-
url: string;
13+
url: z.string(),
1014
/**
1115
* Pattern to match against for links on a page to subsequently crawl
1216
* @example "https://www.builder.io/c/docs/**"
1317
* @default ""
1418
*/
15-
match: string | string[];
19+
match: z.string().or(z.array(z.string())),
20+
1621
/**
1722
* Selector to grab the inner text from
1823
* @example ".docs-builder-container"
1924
* @default ""
2025
*/
21-
selector?: string;
26+
selector: z.string().optional(),
2227
/**
2328
* Don't crawl more than this many pages
2429
* @default 50
2530
*/
26-
maxPagesToCrawl: number;
31+
maxPagesToCrawl: z.number().int().positive(),
2732
/**
2833
* File name for the finished data
2934
* @default "output.json"
3035
*/
31-
outputFileName: string;
36+
outputFileName: z.string(),
3237
/** Optional cookie to be set. E.g. for Cookie Consent */
33-
cookie?: { name: string; value: string };
38+
cookie: z.object({
39+
name: z.string(),
40+
value: z.string(),
41+
}).optional(),
3442
/** Optional function to run for each page found */
35-
onVisitPage?: (options: {
36-
page: Page;
37-
pushData: (data: any) => Promise<void>;
38-
}) => Promise<void>;
43+
onVisitPage: z.function()
44+
.args(z.object({
45+
page: Page,
46+
pushData: z.function()
47+
.args(z.any())
48+
.returns(z.promise(z.void()))
49+
}))
50+
.returns(z.promise(z.void()))
51+
.optional(),
3952
/** Optional timeout for waiting for a selector to appear */
40-
waitForSelectorTimeout?: number;
41-
};
53+
waitForSelectorTimeout: z.number().int().nonnegative().optional(),
54+
});
55+
56+
export type Config = z.infer<typeof configSchema>;
57+

src/core.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import { PlaywrightCrawler } from "crawlee";
33
import { readFile, writeFile } from "fs/promises";
44
import { glob } from "glob";
5-
import { Config } from "./config.js";
5+
import {Config, configSchema} from "./config.js";
66
import { Page } from "playwright";
77

88
let pageCounter = 0;
@@ -46,6 +46,8 @@ export async function waitForXPath(page: Page, xpath: string, timeout: number) {
4646
}
4747

4848
export async function crawl(config: Config) {
49+
configSchema.parse(config);
50+
4951
if (process.env.NO_CRAWL !== "true") {
5052
// PlaywrightCrawler crawls the web using a headless
5153
// browser controlled by the Playwright library.
@@ -111,6 +113,8 @@ export async function crawl(config: Config) {
111113
}
112114

113115
export async function write(config: Config) {
116+
configSchema.parse(config);
117+
114118
const jsonFiles = await glob("storage/datasets/default/*.json", {
115119
absolute: true,
116120
});

tsconfig.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
"resolveJsonModule": true,
88
"noUnusedLocals": false,
99
"skipLibCheck": true,
10-
"lib": ["DOM"]
10+
"lib": ["DOM"],
11+
"strict": true,
1112
},
1213
"include": ["./src/**/*", "config.ts"]
1314
}

0 commit comments

Comments
 (0)