Skip to content

Commit 3b5fe70

Browse files
authored
Merge pull request #518 from easyops-cn/steve/stop-word-filter
feat: support setting a language list to remove their default stop word filter
2 parents 27b888a + e7ce7ac commit 3b5fe70

File tree

13 files changed

+57
-27
lines changed

13 files changed

+57
-27
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ module.exports = {
8282
| hashed | boolean \| `"filename"` \| `"query"` | `false` | Whether to add a hashed query when fetching index (based on the content hash of all indexed `*.md` in `docsDir` and `blogDir` if applicable). Setting to `"filename"` will save hash in filename instead of query. |
8383
| docsDir | string \| string[] | `"docs"` | The dir(s) of docs to get the content hash, it's relative to the dir of your project. |
8484
| blogDir | string \| string[] | `"blog"` | Just like the `docsDir` but applied to blog. |
85-
| removeDefaultStopWordFilter | boolean | `false` | Sometimes people (E.g., us) want to keep the English stop words as indexed, since they maybe are relevant in programming docs. |
85+
| removeDefaultStopWordFilter | boolean \| string[] | `[]` | Sometimes people (E.g., us) want to keep the English stop words as indexed, since they maybe are relevant in programming docs. Set a language list to remove their default stop word filter, `true` is equivalent to `["en"]`. |
8686
| removeDefaultStemmer | boolean | `false` | Enable this if you want to be able to search for any partial word at the cost of search performance. |
8787
| highlightSearchTermsOnTargetPage | boolean | `false` | Highlight search terms on target page. |
8888
| searchResultLimits | number | `8` | Limit the search results. |

docusaurus-search-local/src/client/utils/__mocks__/proxiedGeneratedConstants.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
export let language = ["en", "zh"];
2-
export let removeDefaultStopWordFilter = false;
2+
export let removeDefaultStopWordFilter: string[] = [];
33
export const searchIndexUrl = "search-index{dir}.json?_=abc";
44
export const searchResultLimits = 8;
55
export let fuzzyMatchingDistance = 0;
@@ -8,7 +8,7 @@ export function __setLanguage(value: string[]): void {
88
language = value;
99
}
1010

11-
export function __setRemoveDefaultStopWordFilter(value: boolean): void {
11+
export function __setRemoveDefaultStopWordFilter(value: string[]): void {
1212
removeDefaultStopWordFilter = value;
1313
}
1414

docusaurus-search-local/src/client/utils/smartQueries.spec.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ interface TestQuery {
2828
describe("smartQueries", () => {
2929
beforeEach(() => {
3030
__setLanguage(["en", "zh"]);
31-
__setRemoveDefaultStopWordFilter(false);
31+
__setRemoveDefaultStopWordFilter([]);
3232
});
3333

3434
test.each<[string[], TestQuery[]]>([
@@ -236,7 +236,7 @@ describe("smartQueries", () => {
236236
describe("smartQueries with no stop words filter", () => {
237237
beforeEach(() => {
238238
__setLanguage(["en", "fake"]);
239-
__setRemoveDefaultStopWordFilter(true);
239+
__setRemoveDefaultStopWordFilter(["en"]);
240240
});
241241

242242
test.each<[string[], TestQuery[]]>([

docusaurus-search-local/src/client/utils/smartQueries.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,15 @@ export function smartQueries(
4747
const stopWordPipelines: lunr.PipelineFunction[] = [];
4848
for (const lang of language) {
4949
if (lang === "en") {
50-
if (!removeDefaultStopWordFilter) {
50+
if (!removeDefaultStopWordFilter.includes(lang)) {
5151
stopWordPipelines.unshift(lunr.stopWordFilter);
5252
}
5353
} else {
5454
const lunrLang = (lunr as any)[lang] as typeof lunr;
55-
if (lunrLang.stopWordFilter) {
55+
if (
56+
lunrLang.stopWordFilter &&
57+
!removeDefaultStopWordFilter.includes(lang)
58+
) {
5659
stopWordPipelines.unshift(lunrLang.stopWordFilter);
5760
}
5861
}

docusaurus-search-local/src/declarations.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ declare module "*/generated-constants.js" {
3333
export const fuzzyMatchingDistance: number;
3434
// These below are for mocking only.
3535
export const __setLanguage: (value: string[]) => void;
36-
export const __setRemoveDefaultStopWordFilter: (value: boolean) => void;
36+
export const __setRemoveDefaultStopWordFilter: (value: string[]) => void;
3737
}
3838

3939
declare module "@docusaurus/Head";

docusaurus-search-local/src/index.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ export interface PluginOptions {
7474
/**
7575
* Sometimes people (E.g., us) want to keep the English stop words as indexed, since they
7676
* maybe are relevant in programming docs.
77+
*
78+
* Set a language list to remove their default stop word filter, `true` is equivalent to `["en"]`.
7779
*/
78-
removeDefaultStopWordFilter?: boolean;
80+
removeDefaultStopWordFilter?: boolean | string[];
7981

8082
/**
8183
* Enable this if you want to be able to search for any partial word at the cost of search performance.

docusaurus-search-local/src/server/utils/buildIndex.spec.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ describe("buildIndex", () => {
4141
allDocuments as SearchDocument[][],
4242
{
4343
language: ["en"],
44-
removeDefaultStopWordFilter: false,
44+
removeDefaultStopWordFilter: [] as string[],
4545
removeDefaultStemmer: false,
4646
} as ProcessedPluginOptions
4747
);
@@ -69,7 +69,7 @@ describe("buildIndex", () => {
6969
allDocuments as SearchDocument[][],
7070
{
7171
language: ["zh"],
72-
removeDefaultStopWordFilter: false,
72+
removeDefaultStopWordFilter: [] as string[],
7373
removeDefaultStemmer: false,
7474
} as ProcessedPluginOptions
7575
);
@@ -88,7 +88,7 @@ describe("buildIndex", () => {
8888
allDocuments as SearchDocument[][],
8989
{
9090
language: ["es"],
91-
removeDefaultStopWordFilter: false,
91+
removeDefaultStopWordFilter: [] as string[],
9292
removeDefaultStemmer: false,
9393
} as ProcessedPluginOptions
9494
);
@@ -106,7 +106,7 @@ describe("buildIndex", () => {
106106
allDocuments as SearchDocument[][],
107107
{
108108
language: ["ja"],
109-
removeDefaultStopWordFilter: false,
109+
removeDefaultStopWordFilter: [] as string[],
110110
removeDefaultStemmer: false,
111111
} as ProcessedPluginOptions
112112
);
@@ -138,7 +138,7 @@ describe("buildIndex", () => {
138138
allDocuments as SearchDocument[][],
139139
{
140140
language: ["en", "zh"],
141-
removeDefaultStopWordFilter: true,
141+
removeDefaultStopWordFilter: ["en"],
142142
removeDefaultStemmer: false,
143143
} as ProcessedPluginOptions
144144
);

docusaurus-search-local/src/server/utils/buildIndex.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,19 @@ export function buildIndex(
6060
this.use(plugin);
6161
}
6262

63-
if (removeDefaultStopWordFilter) {
64-
// Sometimes we need no English stop words,
65-
// since they are almost all programming code.
66-
this.pipeline.remove(lunr.stopWordFilter);
63+
// Sometimes we need no English stop words,
64+
// since they are almost all programming code.
65+
for (const lang of language) {
66+
if (removeDefaultStopWordFilter.includes(lang)) {
67+
if (lang === "en") {
68+
this.pipeline.remove(lunr.stopWordFilter);
69+
} else {
70+
const stopWordFilter = (lunr as any)[lang]?.stopWordFilter;
71+
if (stopWordFilter) {
72+
this.pipeline.remove(stopWordFilter);
73+
}
74+
}
75+
}
6776
}
6877

6978
if (removeDefaultStemmer) {

docusaurus-search-local/src/server/utils/processPluginOptions.spec.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ describe("processPluginOptions", () => {
2525
ignoreFiles: ["test"],
2626
ignoreCssSelectors: [],
2727
searchBarPosition: "right",
28+
removeDefaultStopWordFilter: [],
2829
},
2930
],
3031
[
@@ -37,6 +38,7 @@ describe("processPluginOptions", () => {
3738
ignoreFiles: [/__meta__$/],
3839
ignoreCssSelectors: [],
3940
searchBarPosition: "left",
41+
removeDefaultStopWordFilter: true,
4042
},
4143
{
4244
docsRouteBasePath: ["docs"],
@@ -47,6 +49,7 @@ describe("processPluginOptions", () => {
4749
ignoreFiles: [/__meta__$/],
4850
ignoreCssSelectors: [],
4951
searchBarPosition: "left",
52+
removeDefaultStopWordFilter: ["en"],
5053
},
5154
],
5255
])("processPluginOptions(...) should work", (options, config) => {
@@ -72,6 +75,7 @@ describe("processPluginOptions", () => {
7275
ignoreFiles: "test",
7376
ignoreCssSelectors: [],
7477
searchBarPosition: "auto",
78+
removeDefaultStopWordFilter: ["en", "zh"],
7579
},
7680
{
7781
siteDir,
@@ -102,6 +106,7 @@ describe("processPluginOptions", () => {
102106
ignoreFiles: ["test"],
103107
ignoreCssSelectors: [],
104108
searchBarPosition: "left",
109+
removeDefaultStopWordFilter: ["en", "zh"],
105110
});
106111
});
107112
});

docusaurus-search-local/src/server/utils/processPluginOptions.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ export function processPluginOptions(
3535
config.searchBarPosition =
3636
search && search.position === "left" ? "left" : "right";
3737
}
38+
if (!Array.isArray(config.removeDefaultStopWordFilter)) {
39+
config.removeDefaultStopWordFilter = config.removeDefaultStopWordFilter
40+
? ["en"]
41+
: [];
42+
}
3843
return config;
3944
}
4045

0 commit comments

Comments
 (0)