Skip to content

Commit ae5304d

Browse files
josharianpre-commit-ci-lite[bot]pokey
authored
implement non-quadratic version of uniqWith (#1834)
When given a comparator, uniqWith is quadratic. Add uniqWithHash that uses an optional hash function to return to approximately linear behavior, assuming that the hash function is good. For targets and selections, Using the contentRange/selection as the hash value seems to be pretty good in practice. I've added "concise" printing versions of positions, ranges, and selections. These are handy as hash functions, but I've also made heavy use of them while investigating hat allocation. They are generally useful. "take every line" on a file containing 3000 lines used to take about 15s on my computer. Now it takes about 1s. That's still not great but it's fast enough that it doesn't appear that Cursorless has hung. I've half a mind to try upstreaming this, or at least sending a documentation fix, because a quadratic API is a big footgun. Fixes #1830 enough for now ## Checklist - [x] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [/] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [/] I have not broken the cheatsheet --------- Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Co-authored-by: Pokey Rule <[email protected]>
1 parent 52d3eca commit ae5304d

File tree

10 files changed

+206
-9
lines changed

10 files changed

+206
-9
lines changed

packages/common/src/types/Position.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,4 +138,16 @@ export class Position {
138138
public toEmptyRange(): Range {
139139
return new Range(this, this);
140140
}
141+
142+
/**
143+
* Return a concise string representation of the position.
144+
* @returns concise representation
145+
**/
146+
public concise(): string {
147+
return `${this.line}:${this.character}`;
148+
}
149+
150+
public toString(): string {
151+
return this.concise();
152+
}
141153
}

packages/common/src/types/Range.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,16 @@ export class Range {
146146
? new Selection(this.end, this.start)
147147
: new Selection(this.start, this.end);
148148
}
149+
150+
/**
151+
* Return a concise string representation of the range
152+
* @returns concise representation
153+
**/
154+
public concise(): string {
155+
return `${this.start.concise()}-${this.end.concise()}`;
156+
}
157+
158+
public toString(): string {
159+
return this.concise();
160+
}
149161
}

packages/common/src/types/Selection.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,16 @@ export class Selection extends Range {
7272
this.anchor.isEqual(other.anchor) && this.active.isEqual(other.active)
7373
);
7474
}
75+
76+
/**
77+
* Return a concise string representation of the selection
78+
* @returns concise representation
79+
**/
80+
public concise(): string {
81+
return `${this.anchor.concise()}->${this.active.concise()}`;
82+
}
83+
84+
public toString(): string {
85+
return this.concise();
86+
}
7587
}

packages/cursorless-engine/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
"@types/mocha": "^8.0.4",
2828
"@types/sbd": "^1.0.3",
2929
"@types/sinon": "^10.0.2",
30+
"fast-check": "3.12.0",
3031
"js-yaml": "^4.1.0",
3132
"mocha": "^10.2.0",
3233
"sinon": "^11.1.1"

packages/cursorless-engine/src/processTargets/TargetPipelineRunner.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import {
55
Range,
66
ScopeType,
77
} from "@cursorless/common";
8-
import { uniqWith, zip } from "lodash";
8+
import { zip } from "lodash";
99
import {
1010
PrimitiveTargetDescriptor,
1111
RangeTargetDescriptor,
@@ -18,6 +18,7 @@ import { MarkStage, ModifierStage } from "./PipelineStages.types";
1818
import ImplicitStage from "./marks/ImplicitStage";
1919
import { ContainingTokenIfUntypedEmptyStage } from "./modifiers/ConditionalModifierStages";
2020
import { PlainTarget } from "./targets";
21+
import { uniqWithHash } from "../util/uniqWithHash";
2122

2223
export class TargetPipelineRunner {
2324
constructor(
@@ -287,7 +288,11 @@ function calcIsReversed(anchor: Target, active: Target) {
287288
}
288289

289290
function uniqTargets(array: Target[]): Target[] {
290-
return uniqWith(array, (a, b) => a.isEqual(b));
291+
return uniqWithHash(
292+
array,
293+
(a, b) => a.isEqual(b),
294+
(a) => a.contentRange.concise(),
295+
);
291296
}
292297

293298
function ensureSingleEditor(anchorTarget: Target, activeTarget: Target) {

packages/cursorless-engine/src/util/setSelectionsAndFocusEditor.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { EditableTextEditor, Selection } from "@cursorless/common";
22

3-
import uniqDeep from "./uniqDeep";
3+
import { uniqWithHash } from "./uniqWithHash";
44

55
export async function setSelectionsAndFocusEditor(
66
editor: EditableTextEditor,
@@ -22,5 +22,9 @@ export function setSelectionsWithoutFocusingEditor(
2222
editor: EditableTextEditor,
2323
selections: Selection[],
2424
) {
25-
editor.selections = uniqDeep(selections);
25+
editor.selections = uniqWithHash(
26+
selections,
27+
(a, b) => a.isEqual(b),
28+
(s) => s.concise(),
29+
);
2630
}

packages/cursorless-engine/src/util/uniqDeep.ts

Lines changed: 0 additions & 5 deletions
This file was deleted.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import * as assert from "assert";
2+
import * as fc from "fast-check";
3+
import { uniqWith } from "lodash";
4+
import { uniqWithHash } from "./uniqWithHash";
5+
6+
// known good but slow (quadratic!)
7+
function knownGoodUniqWithHash<T>(
8+
array: T[],
9+
fn: (a: T, b: T) => boolean,
10+
_: (t: T) => string,
11+
): T[] {
12+
return uniqWith(array, fn);
13+
}
14+
15+
suite("uniqWithHash", () => {
16+
test("uniqWithHash", () => {
17+
// believe it or not, all these test cases are important
18+
const testCases: number[][] = [
19+
[],
20+
[0],
21+
[1],
22+
[0, 1],
23+
[1, 0],
24+
[0, 0],
25+
[1, 1],
26+
[0, 1, 0, 1],
27+
[1, 0, 1, 0],
28+
[0, 1, 1, 0],
29+
[1, 0, 0, 1],
30+
[0, 1, 2, 3],
31+
[0, 1, 2, 3, 0, 1, 2, 3],
32+
[0, 0, 1, 1, 2, 2, 3, 3],
33+
[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
34+
[0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3],
35+
[0, 1, 2, 3, 4, 5, 0, 1, 2, 3],
36+
[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 0, 1, 2, 3],
37+
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
38+
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
39+
];
40+
41+
const hashFunctions = [
42+
(x: number) => x.toString(),
43+
(x: number) => (x % 2).toString(),
44+
(_: number) => "0",
45+
];
46+
47+
hashFunctions.forEach((hash) => {
48+
const check = (testCase: number[]) => {
49+
const actual = uniqWithHash(testCase, (a, b) => a === b, hash);
50+
const expected = knownGoodUniqWithHash(
51+
testCase,
52+
(a, b) => a === b,
53+
hash,
54+
);
55+
assert.deepStrictEqual(actual, expected);
56+
};
57+
58+
testCases.forEach(check);
59+
60+
// max length 50 because the known good implementation is quadratic
61+
const randomNumbers = fc.array(fc.integer(), { maxLength: 50 });
62+
fc.assert(fc.property(randomNumbers, check));
63+
});
64+
});
65+
});
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import { uniqWith } from "lodash";
2+
3+
/**
4+
* Like lodash.uniqWith, but uses a hash function to (mostly) avoid quadratic runtime.
5+
* @param array The array to uniq
6+
* @param isEqual The equality function
7+
* @param hash The hash function. It must be a valid hash function, insofar as it must return the same value for equal items. A hash function that returns a constant value is equivalent to lodash.uniqWith.
8+
* @returns The uniq array
9+
*/
10+
export function uniqWithHash<T>(
11+
array: T[],
12+
isEqual: (a: T, b: T) => boolean,
13+
hash: (t: T) => string,
14+
): T[] {
15+
// Handle the common, tiny cases without allocating anything extra.
16+
if (array.length < 2) {
17+
return [...array];
18+
}
19+
if (array.length === 2) {
20+
if (isEqual(array[0], array[1])) {
21+
return [array[0]];
22+
}
23+
return [...array];
24+
}
25+
// First, split up the array using the hash function.
26+
// This keeps the sets of items passed to uniqWith small,
27+
// so that the quadratic runtime of uniqWith less of a problem.
28+
29+
/* Keep track of which sets have multiple items, so that we can uniq them. */
30+
const needsUniq: string[] = [];
31+
const hashToItems: Map<string, T[]> = array.reduce((acc, item) => {
32+
const key = hash(item);
33+
const items = acc.get(key);
34+
if (items == null) {
35+
acc.set(key, [item]);
36+
return acc;
37+
}
38+
39+
acc.get(key)!.push(item);
40+
if (items.length === 2) {
41+
needsUniq.push(key);
42+
}
43+
return acc;
44+
}, new Map<string, T[]>());
45+
46+
// Another common case: Everything is unique.
47+
if (needsUniq.length === 0) {
48+
return [...array];
49+
}
50+
51+
// For hash collisions, uniq the items,
52+
// letting uniqWith provide correct semantics.
53+
needsUniq.forEach((key) => {
54+
hashToItems.set(key, uniqWith(hashToItems.get(key)!, isEqual));
55+
});
56+
57+
// To preserve order, step through the original items
58+
// one at a time, returning it as appropriate.
59+
// We need to do this because uniqWith preserves order,
60+
// and we are mimicking its semantics.
61+
// Note that items were added in order,
62+
// and uniqWith preserved that order.
63+
return array.flatMap((item) => {
64+
const key = hash(item);
65+
const items = hashToItems.get(key)!;
66+
if (items == null || items.length === 0) {
67+
// Removed by uniqWith.
68+
return [];
69+
}
70+
const first = items[0]!;
71+
if (!isEqual(first, item)) {
72+
// Removed by uniqWith.
73+
// Note that it is sufficient to check the first item,
74+
// because uniqWith preserves order.
75+
return [];
76+
}
77+
// Emit item.
78+
items.shift();
79+
return first;
80+
});
81+
}

pnpm-lock.yaml

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)