Skip to content

Commit 1fe38d8

Browse files
committed
Add original scraper config
1 parent 852a2aa commit 1fe38d8

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"index_name": "typesense_docs",
3+
"allowed_domains": ["172.17.0.2","docs.servicestack.net"],
4+
"start_urls": [
5+
{
6+
"url": "https://docs.servicestack.net/"
7+
},
8+
{
9+
"url": "https://docs.servicestack.net/redis/"
10+
},
11+
{
12+
"url": "https://docs.servicestack.net/ormlite/"
13+
},
14+
{
15+
"url": "https://docs.servicestack.net/vue/"
16+
},
17+
{
18+
"url": "https://docs.servicestack.net/locode/"
19+
}
20+
],
21+
"stop_urls": ["https://docs.servicestack.net/release*"],
22+
"selectors": {
23+
"default": {
24+
"lvl0": "h1",
25+
"lvl1": ".content h2",
26+
"lvl2": ".content h3",
27+
"lvl3": ".content h4",
28+
"lvl4": ".content h5",
29+
"text": ".content p, .content ul li, .content table tbody tr"
30+
}
31+
},
32+
"scrape_start_urls": true,
33+
"strip_chars": " .,;:#"
34+
}

0 commit comments

Comments
 (0)