Skip to content

Commit 3cda4eb

Browse files
Merge pull request #122 from cipherCOM/main
feat: add exclude pattern for links in config
2 parents 0bbbddd + 16443ed commit 3cda4eb

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

src/config.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ export const configSchema = z.object({
2020
* @default ""
2121
*/
2222
match: z.string().or(z.array(z.string())),
23-
23+
/**
24+
* Pattern to match against for links on a page to exclude from crawling
25+
* @example "https://www.builder.io/c/docs/**"
26+
* @default ""
27+
*/
28+
exclude: z.string().or(z.array(z.string())).optional(),
2429
/**
2530
* Selector to grab the inner text from
2631
* @example ".docs-builder-container"

src/core.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ export async function crawl(config: Config) {
9292
await enqueueLinks({
9393
globs:
9494
typeof config.match === "string" ? [config.match] : config.match,
95+
exclude:
96+
typeof config.exclude === "string" ? [config.exclude] : config.exclude ?? [],
9597
});
9698
},
9799
// Comment this option to scrape the full website.

0 commit comments

Comments
 (0)