Skip to content

Commit a1f5bab

Browse files
committed
Build: Add . dot as word boundary in Typesense and downrank non-API pages
Override the default from https://github.com/typesense/typesense-docsearch-scraper/blob/0.6.0/scraper/src/typesense_helper.py#L58 > 'token_separators': ['_', '-'] This should make it so that "jQuery.ajax" is tokenised as "jquery ajax" instead of "jqueryajax". Ref typesense/typesense-docsearch-scraper#40.
1 parent 21e1958 commit a1f5bab

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

docsearch.config.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"index_name": "jquery_com",
33
"start_urls": [
4-
{ "url": "https://jquery.com" },
5-
{ "url": "https://api.jquery.com", "selectors_key": "api" }
4+
{ "url": "https://api.jquery.com", "selectors_key": "api", "page_rank": 20 },
5+
{ "url": "https://jquery.com", "page_rank": 10 }
66
],
77
"stop_urls": [
88
".com/category/"
@@ -34,6 +34,9 @@
3434
"text": ".entry-content p, .entry-content li"
3535
}
3636
},
37+
"custom_settings": {
38+
"token_separators": ["_", "-", "."]
39+
},
3740
"selectors_exclude": [
3841
"header ~ article",
3942
".returns",

0 commit comments

Comments
 (0)