Skip to content

Commit e656750

Browse files
committed
Adjust lucene query & logic
- Prefer fuzzy/distance searches - this allows misspellings - Narrow results on each input word, instead of widening - Filter out results with too low of a score
1 parent 320603e commit e656750

File tree

3 files changed

+38
-6
lines changed

3 files changed

+38
-6
lines changed

src/components/project/project-filters.query.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export const projectFilters = filter.define(() => ProjectFilters, {
1212
relation('out', '', 'name', ACTIVE),
1313
node('match'),
1414
]),
15+
minScore: 0.8,
1516
}),
1617
type: filter.stringListBaseNodeProp(),
1718
status: filter.stringListProp(),

src/core/database/query/filters.ts

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
import { entries, Nil } from '@seedcompany/common';
1+
import { cleanSplit, entries, Nil } from '@seedcompany/common';
22
import {
33
comparisions,
4+
greaterThan,
45
inArray,
56
isNull,
67
node,
@@ -238,27 +239,55 @@ export const sub =
238239
export const fullText =
239240
({
240241
index,
242+
escapeLucene = true,
243+
toLucene,
244+
minScore = 0,
241245
matchToNode,
242246
}: {
243247
index: () => FullTextIndex;
248+
escapeLucene?: boolean;
249+
toLucene?: (input: string) => string;
250+
minScore?: number;
244251
matchToNode: (query: Query) => Query;
245252
}) =>
246253
<T, K extends ConditionalKeys<T, string | undefined>>({
247-
value: input,
254+
value,
248255
key: field,
249256
query,
250257
}: BuilderArgs<T, K>) => {
251-
if (!input || typeof input !== 'string') {
258+
if (!value || typeof value !== 'string') {
252259
return null;
253260
}
254-
const escaped = escapeLuceneSyntax(input);
255261

256-
const lucene = `*${escaped}*`;
262+
let input: string = value;
263+
264+
input = escapeLucene ? escapeLuceneSyntax(input) : input;
265+
266+
const lucene =
267+
toLucene?.(input) ??
268+
// Default to each word being matched.
269+
// And for each word...
270+
cleanSplit(input, ' ')
271+
.map((term) => {
272+
const adjusted = [
273+
// fuzzy (distance) search with boosted priority
274+
`${term}~^2`,
275+
// word prefixes in case the distance is too great
276+
`*${term}*`,
277+
].join(' OR ');
278+
return `(${adjusted})`;
279+
})
280+
.join(' AND ');
257281

258282
query
259283
.subQuery((q) =>
260284
q
261-
.call(index().search(lucene, { limit: 100 }).yield({ node: 'match' }))
285+
.call(
286+
index()
287+
.search(lucene, { limit: 100 })
288+
.yield({ node: 'match', score: true }),
289+
)
290+
.where({ score: greaterThan(minScore) })
262291
.apply(matchToNode)
263292
.return(collect('distinct node').as(`${field}Matches`)),
264293
)

src/core/database/query/full-text.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ export const FullTextIndex = (config: {
5959
*
6060
* NOTE: the `query` is Lucene syntax.
6161
* If this is coming from user input, consider using the {@link escapeLuceneSyntax} function.
62+
*
63+
* @see https://lucene.apache.org/core/2_9_4/queryparsersyntax.html
6264
*/
6365
search: (
6466
query: string,

0 commit comments

Comments
 (0)