Skip to content

Commit 3b4ff68

Browse files
committed
feat: guarantee a well-defined order for all flexSearch queries
A flexSearch query without orderBy will now use the primarySort (flexSearchOrder) as order. If no flexSearchOrder is configured, this defaults to createdAt_DESC, id_DESC. If flexSearchOrder is configured, createdAt_DESC, id_DESC will always be appended to make order deterministic. A flexSearch query with an explicit orderBy that contradicts with the primarySort (so primarySort cannot be used) will also use an absolute order now. The performance effect should be relatively minor because using primary sort is very efficient.
1 parent c10d36e commit 3b4ff68

File tree

5 files changed

+69
-21
lines changed

5 files changed

+69
-21
lines changed

src/model/implementation/root-entity-type.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ export class RootEntityType extends ObjectTypeBase {
179179
@memorize()
180180
get discriminatorField(): Field {
181181
// later, we can return @key here when it exists and is required
182+
// however, consider: this is used everywhere in primarySort, so changing it would cause
183+
// migrations to re-create all flexsearch views
182184
return this.getFieldOrThrow(ID_FIELD);
183185
}
184186

src/schema-generation/flex-search-generator.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,10 @@ export class FlexSearchGenerator {
222222
...schemaField,
223223
transform: (sourceNode, args, context) => {
224224
const assertionVariable = new VariableQueryNode();
225-
// If a filter or an order_by is specified, a pre-execution query node is added that throws a TOO_MANY_OBJECTS_ERROR if the amount of objects the filter or order_by is
226-
// used on is to large
225+
// If a filter or an order_by is specified, a pre-execution query node is added
226+
// that throws a TOO_MANY_OBJECTS_ERROR if the amount of objects returned by the
227+
// flexSearchFilter is too large. This prevents performance issues with
228+
// in-memory filtering and sorting.
227229
const filterArg = args[POST_FILTER_ARG] || args[FILTER_ARG];
228230
if (
229231
(filterArg && Object.keys(filterArg).length > 0) ||

src/schema-generation/order-by-and-pagination-augmentation.ts

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ import { QueryNodeField } from './query-node-object-type';
4545
import { RootFieldHelper } from './root-field-helper';
4646
import { and, binaryOp, binaryOpWithAnaylzer } from './utils/input-types';
4747
import { getOrderByValues } from './utils/pagination';
48+
import {
49+
getSortClausesForPrimarySort,
50+
orderArgMatchesPrimarySort,
51+
} from './utils/flex-search-utils';
4852

4953
export enum LimitTypeCheckType {
5054
RESULT_VALIDATOR = 'RESULT_VALIDATOR',
@@ -201,17 +205,40 @@ export class OrderByAndPaginationAugmentation {
201205
// ok. If the sorting matches the primary sort, sorting is efficient, but you still need to specify
202206
// it, otherwise, sometimes the order can be wrong (after inserting or updating documents).
203207

204-
// this may generate a SORT clause that is not covered by the flexsearch index,
205-
// but the TooManyObject check of flex-search-generator already handles this case to throw
206-
// a TooManyObjects error if needed.
207-
orderByValues = getOrderByValues(args, orderByType, {
208-
isAbsoluteOrderRequired,
209-
});
208+
// flexsearch is mostly used in tables, where a consistent absolute sort order is important
209+
// At the same time, primary sort is relatively cheap to use (in a test with primed caches on
210+
// 10 million objects, slowdown was 9% (after warmup), and the whole operation was still sub-millisecond)
211+
// for this reason, we guarantee an absolute, well-defined sort ordering at all times in flexsearch
212+
// - if possible (does not contradict orderBy), we use the primarySort, which is very fast
213+
// - if not possible, sorting will happen in-memory anyway, so it does not hurt to make it absolute
214+
if (
215+
orderArgMatchesPrimarySort(
216+
args[ORDER_BY_ARG],
217+
listNode.rootEntityType.flexSearchPrimarySort,
218+
)
219+
) {
220+
// note that the primary sort always has an absolute order
221+
// (there is special logic in RootEntityType to guarantee this)
222+
orderByValues = getSortClausesForPrimarySort(
223+
listNode.rootEntityType,
224+
orderByType,
225+
);
226+
} else {
227+
// this will generate a SORT clause that is not covered by the primary sort,
228+
// so sorting will happen purely in memory.
229+
// FlexSearchGenerator.augmentWithCondition will add a check to throw a
230+
// FLEX_SEARCH_TOO_MANY_OBJECTS error if there are too many objects to sort in memory
231+
// (it has the same orderArgMatchesPrimarySort-based logic)
232+
orderByValues = getOrderByValues(args, orderByType, {
233+
// Since we're already sorting in-memory, it does not hurt to make this ordering absolute
234+
isAbsoluteOrderRequired: true,
235+
});
236+
}
210237

211-
// for now, cursor-based pagination is only allowed when we can use flexsearch filters for the
212-
// paginationFilter. This simplifies the implementation, but maybe we should support it in the
213-
// future for consistency. Then however we need to adjust the too-many-objects check
214-
// do this check also if cursor is just requested so we get this error on the first page
238+
// Cursor-based pagination is only allowed when we can use flexsearch filters for the
239+
// paginationFilter. Otherwise, we would need a postFilter for the "after" arg,
240+
// and that could lead to bad performance / TOO_MANY_OBJECTS error. Better be
241+
// consistent and outright disallow cursor-based pagination in this case
215242
if (isCursorRequested || afterArg) {
216243
const violatingClauses = orderByValues.filter((val) =>
217244
val.path.some((field) => !field.isFlexSearchIndexed),

src/schema-generation/output-type-generator.ts

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ import {
3636
QueryNodeOutputType,
3737
} from './query-node-object-type';
3838
import { RootFieldHelper } from './root-field-helper';
39-
import { orderArgMatchesPrimarySort } from './utils/flex-search-utils';
39+
import {
40+
getSortClausesForPrimarySort,
41+
orderArgMatchesPrimarySort,
42+
} from './utils/flex-search-utils';
4043
import { getOrderByValues } from './utils/pagination';
4144

4245
export class OutputTypeGenerator {
@@ -159,13 +162,7 @@ export class OutputTypeGenerator {
159162
objectType.flexSearchPrimarySort,
160163
)
161164
) {
162-
// this would be cleaner if the primary sort was actually parsed into a ModelComponent (see e.g. the Index and IndexField classes)
163-
orderByValues = objectType.flexSearchPrimarySort.map((clause) =>
164-
orderByType.getValueOrThrow(
165-
clause.field.path.replace('.', '_') +
166-
(clause.direction === OrderDirection.ASCENDING ? '_ASC' : '_DESC'),
167-
),
168-
);
165+
orderByValues = getSortClausesForPrimarySort(objectType, orderByType);
169166
} else {
170167
orderByValues = getOrderByValues(listFieldRequest.args, orderByType, {
171168
isAbsoluteOrderRequired: true,

src/schema-generation/utils/flex-search-utils.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import { FlexSearchPrimarySortClause } from '../../model/implementation/flex-search';
22
import { OrderDirection } from '../../model/implementation/order';
3+
import { RootEntityType } from '../../model';
4+
import { OrderByEnumType, OrderByEnumValue } from '../order-by-enum-generator';
35

46
export function orderArgMatchesPrimarySort(
57
clauses: ReadonlyArray<string> | undefined,
68
primarySort: ReadonlyArray<FlexSearchPrimarySortClause>,
79
): boolean {
8-
// TODO what about sort clauses that are added automatically because the user used cursor-based pagination?
10+
// if no orderBy is given, we will always default to the primarySort order (OrderByAndPaginationAugmentation)
911
if (!clauses || !clauses.length) {
1012
return true;
1113
}
@@ -14,6 +16,8 @@ export function orderArgMatchesPrimarySort(
1416
return false;
1517
}
1618

19+
// note that primary sort cannot be used backwards
20+
1721
for (let index = 0; index < clauses.length; index++) {
1822
const arg = clauses[index];
1923
if (arg.endsWith('_ASC')) {
@@ -37,3 +41,19 @@ export function orderArgMatchesPrimarySort(
3741

3842
return true;
3943
}
44+
45+
/**
46+
* Generates a list of orderBy enum values that exactly represent the primary search
47+
*/
48+
export function getSortClausesForPrimarySort(
49+
objectType: RootEntityType,
50+
orderByType: OrderByEnumType,
51+
): ReadonlyArray<OrderByEnumValue> {
52+
// this would be cleaner if the primary sort was actually parsed into a ModelComponent (see e.g. the Index and IndexField classes)
53+
return objectType.flexSearchPrimarySort.map((clause) =>
54+
orderByType.getValueOrThrow(
55+
clause.field.path.replace('.', '_') +
56+
(clause.direction === OrderDirection.ASCENDING ? '_ASC' : '_DESC'),
57+
),
58+
);
59+
}

0 commit comments

Comments
 (0)