Skip to content

Commit fdbbf0c

Browse files
committed
Refactor full text abstraction
1 parent c4bb0f8 commit fdbbf0c

File tree

5 files changed

+184
-36
lines changed

5 files changed

+184
-36
lines changed

src/components/product/product.repository.ts

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import {
2727
deactivateProperty,
2828
escapeLuceneSyntax,
2929
filter,
30-
fullTextQuery,
30+
FullTextIndex,
3131
matchProps,
3232
matchPropsAndProjectSensAndScopedRoles,
3333
merge,
@@ -552,7 +552,7 @@ export class ProductRepository extends CommonRepository {
552552
.query()
553553
.apply((q) =>
554554
query
555-
? q.apply(fullTextQuery('ProductCompletionDescription', query))
555+
? q.apply(ProductCompletionDescriptionIndex.search(query))
556556
: q.matchNode('node', 'ProductCompletionDescription'),
557557
)
558558
.apply((q) =>
@@ -569,18 +569,21 @@ export class ProductRepository extends CommonRepository {
569569

570570
@OnIndex('schema')
571571
private async createCompletionDescriptionIndex() {
572-
await this.db.createFullTextIndex(
573-
'ProductCompletionDescription',
574-
['ProductCompletionDescription'],
575-
['value'],
576-
{
577-
analyzer: 'standard-folding',
578-
},
579-
);
572+
await this.db
573+
.query()
574+
.apply(ProductCompletionDescriptionIndex.create())
575+
.run();
580576
}
581577

582578
@OnIndex()
583579
private createResourceIndexes() {
584580
return this.getConstraintsFor(Product);
585581
}
586582
}
583+
584+
const ProductCompletionDescriptionIndex = FullTextIndex({
585+
indexName: 'ProductCompletionDescription',
586+
labels: 'ProductCompletionDescription',
587+
properties: 'value',
588+
analyzer: 'standard-folding',
589+
});

src/components/search/search.repository.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { CommonRepository, OnIndex, OnIndexParams } from '~/core/database';
44
import {
55
ACTIVE,
66
escapeLuceneSyntax,
7-
fullTextQuery,
7+
FullTextIndex,
88
} from '~/core/database/query';
99
import { BaseNode } from '~/core/database/results';
1010
import { SearchInput } from './dto';
@@ -13,9 +13,7 @@ import { SearchInput } from './dto';
1313
export class SearchRepository extends CommonRepository {
1414
@OnIndex('schema')
1515
protected async applyIndexes({ db }: OnIndexParams) {
16-
await db.createFullTextIndex('propValue', ['Property'], ['value'], {
17-
analyzer: 'standard-folding',
18-
});
16+
await db.query().apply(GlobalIndex.create()).run();
1917
}
2018

2119
/**
@@ -36,8 +34,7 @@ export class SearchRepository extends CommonRepository {
3634

3735
.union()
3836

39-
.raw('', { query: lucene })
40-
.apply(fullTextQuery('propValue', '$query', ['node as property']))
37+
.apply(GlobalIndex.search(lucene, { yield: 'node as property' }))
4138
.match([node('node'), relation('out', 'r', ACTIVE), node('property')])
4239
.return(['node', 'collect(type(r)) as matchedProps'])
4340
// The input.count is going to be applied once the results are 'filtered'
@@ -62,3 +59,10 @@ export class SearchRepository extends CommonRepository {
6259
return await query.run();
6360
}
6461
}
62+
63+
const GlobalIndex = FullTextIndex({
64+
indexName: 'propValue',
65+
labels: 'Property',
66+
properties: 'value',
67+
analyzer: 'standard-folding',
68+
});

src/core/database/query/cypher-functions.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { exp, ExpressionInput } from './cypher-expression';
2+
import { IndexFullTextQueryNodes } from './full-text';
23

34
/** Create a function with a name that takes a variable number of arguments */
45
const fn =
@@ -111,3 +112,11 @@ export const any = (
111112
list: ExpressionInput,
112113
predicate: ExpressionInput,
113114
) => fn('any')(`${variable} IN ${exp(list)} WHERE ${exp(predicate)}`);
115+
116+
export const db = {
117+
index: {
118+
fulltext: {
119+
queryNodes: IndexFullTextQueryNodes,
120+
},
121+
},
122+
};
Lines changed: 151 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,160 @@
1+
import { entries, isNotNil, many, Many, mapKeys } from '@seedcompany/common';
12
import { Query } from 'cypher-query-builder';
3+
import { pickBy } from 'lodash';
4+
import { LiteralUnion } from 'type-fest';
5+
import { CypherExpression, exp, isExp } from './cypher-expression';
6+
import { db } from './cypher-functions';
27

38
/**
4-
* Query a full text index for results.
5-
*
6-
* NOTE: the `query` is Lucene syntax. If this is coming from user input, consider
7-
* using the {@link escapeLuceneSyntax} function.
9+
* @see https://neo4j.com/docs/cypher-manual/current/indexes/semantic-indexes/full-text-indexes/
810
*/
9-
export const fullTextQuery =
10-
(index: string, query: string, yieldTerms = ['node']) =>
11-
(q: Query) =>
12-
q.raw(
13-
'CALL db.index.fulltext.queryNodes($index, $query)' +
14-
(yieldTerms && yieldTerms.length > 0
15-
? ' YIELD ' + yieldTerms.join(', ')
16-
: ''),
17-
{
18-
index,
19-
// fallback to "" when no query is given, so that no results are
20-
// returned instead of the procedure failing
21-
query: query.trim() || '""',
22-
},
23-
);
11+
export const FullTextIndex = (config: {
12+
indexName: string;
13+
labels: Many<string>;
14+
properties: Many<string>;
15+
analyzer?: Analyzer;
16+
/**
17+
* This means that updates will be applied in a background thread "as soon as possible",
18+
* instead of during a transaction commit, which is true for other indexes.
19+
*/
20+
eventuallyConsistent?: boolean;
21+
}) => {
22+
const quote = (q: string) => `'${q}'`;
23+
24+
const { indexName } = config;
25+
26+
return {
27+
/**
28+
* Query to create the full text index (if needed).
29+
*/
30+
create: () => {
31+
const parsedConfig = {
32+
analyzer: config.analyzer ? quote(config.analyzer) : undefined,
33+
// eslint-disable-next-line @typescript-eslint/naming-convention
34+
eventually_consistent: config.eventuallyConsistent
35+
? exp(config.eventuallyConsistent)
36+
: undefined,
37+
};
38+
const options =
39+
entries(pickBy(parsedConfig, (v) => v !== undefined)).length > 0
40+
? {
41+
indexConfig: mapKeys(parsedConfig, (k) => `fulltext.${k}`)
42+
.asRecord,
43+
}
44+
: undefined;
45+
const query = `
46+
CREATE FULLTEXT INDEX ${indexName} IF NOT EXISTS
47+
FOR (n:${many(config.labels).join('|')})
48+
ON EACH ${exp(many(config.properties).map((p) => `n.${p}`))}
49+
${options ? `OPTIONS ${exp(options)}` : ''}
50+
`;
51+
return (q: Query) => q.raw(query);
52+
},
53+
54+
/**
55+
* Query the full text index for results.
56+
*
57+
* NOTE: the `query` is Lucene syntax.
58+
* If this is coming from user input, consider using the {@link escapeLuceneSyntax} function.
59+
*/
60+
search: (
61+
query: string,
62+
config: {
63+
yield?: Many<string>;
64+
skip?: number;
65+
limit?: number;
66+
analyzer?: Analyzer;
67+
} = {},
68+
) => {
69+
const { yield: yieldTerms = ['node'], ...options } = config;
70+
71+
// fallback to "" when no query is given, so that no results are
72+
// returned instead of the procedure failing
73+
query = query.trim() || '""';
74+
75+
return (q: Query) =>
76+
q
77+
.call(db.index.fulltext.queryNodes(indexName, query, options))
78+
.yield(yieldTerms);
79+
},
80+
};
81+
};
2482

2583
export const escapeLuceneSyntax = (query: string) =>
2684
query
2785
.replace(/[![\]~)(+\-:?*"^&|{}\\/]/g, (char) => `\\${char}`)
2886
.replace(/\b(OR|AND|NOT)\b/g, (char) => `"${char}"`);
87+
88+
export const IndexFullTextQueryNodes = (
89+
indexName: string,
90+
query: string,
91+
options?:
92+
| {
93+
skip?: number;
94+
limit?: number;
95+
analyzer?: string;
96+
}
97+
| CypherExpression,
98+
) => ({
99+
name: 'db.index.fulltext.queryNodes',
100+
args: {
101+
indexName,
102+
query,
103+
...(options &&
104+
(Object.values(options).filter(isNotNil).length > 0 || isExp(options))
105+
? { options }
106+
: undefined),
107+
},
108+
});
109+
110+
type Analyzer = LiteralUnion<KnownAnalyzer, string>;
111+
112+
/**
113+
* List from Neo4j with:
114+
* CALL db.index.fulltext.listAvailableAnalyzers()
115+
*/
116+
type KnownAnalyzer =
117+
// Analyzer that uses ASCIIFoldingFilter to remove accents (diacritics).
118+
// Otherwise, it behaves as a standard english analyzer.
119+
// Note: This analyzer may have unexpected behavior, such as tokenizing, for all non-ASCII numbers and symbols.
120+
| 'standard-folding'
121+
// A simple analyzer that tokenizes at non-letter boundaries.
122+
// No stemming or filtering.
123+
// Works okay for most European languages, but is terrible for languages where words aren't separated by spaces, such as many Asian languages.
124+
| 'simple'
125+
// Stop analyzer tokenizes at non-letter characters, and filters out English stop words.
126+
// This differs from the 'classic' and 'standard' analyzers in that it makes no effort to recognize special terms,
127+
// like likely product names, URLs or email addresses.
128+
| 'stop'
129+
// Keyword analyzer "tokenizes" the text as a single term.
130+
// Useful for zip-codes, ids, etc. Situations where complete and exact matches are desired.
131+
| 'keyword'
132+
// The standard analyzer.
133+
// Tokenizes on non-letter and filters out English stop words and punctuation.
134+
// Does no stemming, but takes care to keep likely product names, URLs and email addresses as single terms.
135+
| 'standard'
136+
// Breaks text into terms by characters that have the unicode WHITESPACE property.
137+
| 'unicode_whitespace'
138+
// Tokenizes into sequences of alphanumeric, numeric, URL, email, southeast asian terms,
139+
// and into terms of individual ideographic and hiragana characters.
140+
// English stop words are filtered out.
141+
| 'url'
142+
// English analyzer with stemming and stop word filtering.
143+
| 'english'
144+
// Tokenizes into sequences of alphanumeric, numeric, URL, email, southeast asian terms,
145+
// and into terms of individual ideographic and hiragana characters.
146+
// English stop words are filtered out.
147+
| 'url_or_email'
148+
// The default analyzer.
149+
// Similar to the 'standard' analyzer, but filters no stop words.
150+
// Tokenizes on non-letter boundaries filters out punctuation.
151+
// Does no stemming, but takes care to keep likely product names, URLs and email addresses as single terms.
152+
| 'standard-no-stop-words'
153+
// Classic Lucene analyzer. Similar to 'standard', but with worse unicode support.
154+
| 'classic'
155+
// Tokenizes into sequences of alphanumeric, numeric, URL, email, southeast asian terms,
156+
// and into terms of individual ideographic and hiragana characters.
157+
// English stop words are filtered out.
158+
| 'email'
159+
// Breaks text into terms by characters that are considered "Java whitespace".
160+
| 'whitespace';

src/core/database/query/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ export * from './properties/update-relation-list';
1111
export * from './create-relationships';
1212
export * from './cypher-expression';
1313
export * from './cypher-functions';
14-
export * from './full-text';
14+
export { FullTextIndex, escapeLuceneSyntax } from './full-text';
1515
export * from './lists';
1616
export * from './sorting';
1717
export * from './matching';

0 commit comments

Comments
 (0)