Skip to content

Commit 19849e8

Browse files
Flatten scope type list (#2628)
Brings our dfa compilation time from around 700ms down to about 320ms Fixes #2614 I had forgot, but we actually do parse `<user.any_alphanumeric_key>` se we can show the users spoken form in the tutorial. I've now utilize this further and actually add the spoken forms for the glyph scope type in the flattened list. Note that this implementation is somewhat hacky on purpose. Basically I didn't want to touch our csv parser without first talking to pokey since a lot of these list and spoken forms are used in places like the cheat sheet and the tutorial. What I'm instead doing is keeping all the existing lists and then creating a new list that is a flattened version of them. That way the sheet sheet and other places can still use the individual list and we are only using this flattened larger list for the actual scope type capture. This is probably something we want to revisit later, but for now we're getting a huge boast in dfa compilation time with no changes to the speakable grammar and that I think is a clear win. ## Checklist - [/] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [/] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [x] I have not broken the cheatsheet - [x] Run Talon grammar tests --------- Co-authored-by: Phil Cohen <[email protected]>
1 parent 0e01381 commit 19849e8

File tree

11 files changed

+149
-236
lines changed

11 files changed

+149
-236
lines changed

cursorless-talon/src/csv_overrides.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@ class SpokenFormEntry:
4949
spoken_forms: list[str]
5050

5151

52+
def csv_get_ctx():
53+
return ctx
54+
55+
56+
def csv_get_normalized_ctx():
57+
return normalized_ctx
58+
59+
5260
def init_csv_and_watch_changes(
5361
filename: str,
5462
default_values: ListToSpokenForms,

cursorless-talon/src/get_grapheme_spoken_form_entries.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
grapheme_capture_name = "user.any_alphanumeric_key"
1212

1313

14-
def get_grapheme_spoken_form_entries() -> list[SpokenFormOutputEntry]:
14+
def get_grapheme_spoken_form_entries(
15+
grapheme_talon_list: dict[str, str],
16+
) -> list[SpokenFormOutputEntry]:
1517
if grapheme_capture_name not in registry.captures:
1618
# We require this capture, and expect it to be defined. We want to show a user friendly error if it isn't present (usually indicating a problem with their community.git setup) and we think the user is going to use Cursorless.
1719
# However, sometimes users use different dictation engines (Vosk, Webspeech) with entirely different/smaller grammars that don't have the capture, and this code will run then, and falsely error. We don't want to show an error in that case because they don't plan to actually use Cursorless.
@@ -28,11 +30,20 @@ def get_grapheme_spoken_form_entries() -> list[SpokenFormOutputEntry]:
2830
"id": id,
2931
"spokenForms": spoken_forms,
3032
}
31-
for symbol_list in generate_lists_from_capture(grapheme_capture_name)
32-
for id, spoken_forms in get_id_to_spoken_form_map(symbol_list).items()
33+
for id, spoken_forms in talon_list_to_spoken_form_map(
34+
grapheme_talon_list
35+
).items()
3336
]
3437

3538

39+
def get_graphemes_talon_list() -> dict[str, str]:
40+
return {
41+
spoken_form: id
42+
for symbol_list in generate_lists_from_capture(grapheme_capture_name)
43+
for spoken_form, id in get_id_to_talon_list(symbol_list).items()
44+
}
45+
46+
3647
def generate_lists_from_capture(capture_name) -> Iterator[str]:
3748
"""
3849
Given the name of a capture, yield the names of each list that the capture
@@ -68,20 +79,27 @@ def generate_lists_from_capture(capture_name) -> Iterator[str]:
6879
)
6980

7081

71-
def get_id_to_spoken_form_map(list_name: str) -> Mapping[str, list[str]]:
82+
def get_id_to_talon_list(list_name: str) -> dict[str, str]:
7283
"""
73-
Given the name of a Talon list, return a mapping from the values in that
74-
list to the list of spoken forms that map to the given value.
84+
Given the name of a Talon list, return that list
7585
"""
7686
try:
7787
# NB: [-1] because the last list is the active one
78-
raw_list = typing.cast(dict[str, str], registry.lists[list_name][-1]).copy()
88+
return typing.cast(dict[str, str], registry.lists[list_name][-1]).copy()
7989
except Error:
8090
app.notify(f"Error getting list {list_name}")
8191
return {}
8292

93+
94+
def talon_list_to_spoken_form_map(
95+
talon_list: dict[str, str],
96+
) -> Mapping[str, list[str]]:
97+
"""
98+
Given a Talon list, return a mapping from the values in that
99+
list to the list of spoken forms that map to the given value.
100+
"""
83101
inverted_list: defaultdict[str, list[str]] = defaultdict(list)
84-
for key, value in raw_list.items():
102+
for key, value in talon_list.items():
85103
inverted_list[value].append(key)
86104

87105
return inverted_list

cursorless-talon/src/modifiers/glyph_scope.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

cursorless-talon/src/modifiers/modifiers.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def cursorless_simple_modifier(m) -> dict[str, str]:
2727
"<user.cursorless_simple_scope_modifier>", # funk, state, class, every funk
2828
"<user.cursorless_ordinal_scope>", # first past second word
2929
"<user.cursorless_relative_scope>", # next funk, 3 funks
30-
"<user.cursorless_surrounding_pair_force_direction>", # DEPRECATED "left quad" / "right quad"
3130
]
3231

3332
modifiers = [

cursorless-talon/src/modifiers/scopes.py

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,25 @@
44

55
mod.list("cursorless_scope_type", desc="Supported scope types")
66
mod.list("cursorless_scope_type_plural", desc="Supported plural scope types")
7+
8+
mod.list(
9+
"cursorless_glyph_scope_type",
10+
desc="Cursorless glyph scope type",
11+
)
12+
mod.list(
13+
"cursorless_glyph_scope_type_plural",
14+
desc="Plural version of Cursorless glyph scope type",
15+
)
16+
17+
mod.list(
18+
"cursorless_surrounding_pair_scope_type",
19+
desc="Scope types that can function as surrounding pairs",
20+
)
21+
mod.list(
22+
"cursorless_surrounding_pair_scope_type_plural",
23+
desc="Plural form of scope types that can function as surrounding pairs",
24+
)
25+
726
mod.list(
827
"cursorless_custom_regex_scope_type",
928
desc="Supported custom regular expression scope types",
@@ -13,60 +32,49 @@
1332
desc="Supported plural custom regular expression scope types",
1433
)
1534

16-
17-
@mod.capture(
18-
rule="{user.cursorless_scope_type}"
19-
" | <user.cursorless_surrounding_pair_scope_type>"
20-
" | <user.cursorless_glyph_scope_type>"
21-
" | {user.cursorless_custom_regex_scope_type}"
35+
mod.list(
36+
"cursorless_scope_type_flattened",
37+
desc="All supported scope types flattened",
38+
)
39+
mod.list(
40+
"cursorless_scope_type_flattened_plural",
41+
desc="All supported plural scope types flattened",
2242
)
23-
def cursorless_scope_type(m) -> dict[str, str]:
24-
"""Cursorless scope type singular"""
25-
try:
26-
return {"type": m.cursorless_scope_type}
27-
except AttributeError:
28-
pass
29-
30-
try:
31-
return m.cursorless_surrounding_pair_scope_type
32-
except AttributeError:
33-
pass
3443

35-
try:
36-
return m.cursorless_glyph_scope_type
37-
except AttributeError:
38-
pass
3944

40-
return {
41-
"type": "customRegex",
42-
"regex": m.cursorless_custom_regex_scope_type,
43-
}
45+
@mod.capture(rule="{user.cursorless_scope_type_flattened}")
46+
def cursorless_scope_type(m) -> dict[str, str]:
47+
"""Cursorless scope type singular"""
48+
return creates_scope_type(m.cursorless_scope_type_flattened)
4449

4550

46-
@mod.capture(
47-
rule="{user.cursorless_scope_type_plural}"
48-
" | <user.cursorless_surrounding_pair_scope_type_plural>"
49-
" | <user.cursorless_glyph_scope_type_plural>"
50-
" | {user.cursorless_custom_regex_scope_type_plural}"
51-
)
51+
@mod.capture(rule="{user.cursorless_scope_type_flattened_plural}")
5252
def cursorless_scope_type_plural(m) -> dict[str, str]:
5353
"""Cursorless scope type plural"""
54-
try:
55-
return {"type": m.cursorless_scope_type_plural}
56-
except AttributeError:
57-
pass
58-
59-
try:
60-
return m.cursorless_surrounding_pair_scope_type_plural
61-
except AttributeError:
62-
pass
54+
return creates_scope_type(m.cursorless_scope_type_flattened_plural)
6355

64-
try:
65-
return m.cursorless_glyph_scope_type_plural
66-
except AttributeError:
67-
pass
6856

69-
return {
70-
"type": "customRegex",
71-
"regex": m.cursorless_custom_regex_scope_type_plural,
72-
}
57+
def creates_scope_type(id: str) -> dict[str, str]:
58+
grouping, value = id.split(".", 1)
59+
match grouping:
60+
case "simple":
61+
return {
62+
"type": value,
63+
}
64+
case "surroundingPair":
65+
return {
66+
"type": "surroundingPair",
67+
"delimiter": value,
68+
}
69+
case "customRegex":
70+
return {
71+
"type": "customRegex",
72+
"regex": value,
73+
}
74+
case "glyph":
75+
return {
76+
"type": "glyph",
77+
"character": value,
78+
}
79+
case _:
80+
raise ValueError(f"Unsupported scope type grouping: {grouping}")

cursorless-talon/src/modifiers/surrounding_pair.py

Lines changed: 0 additions & 80 deletions
This file was deleted.

cursorless-talon/src/paired_delimiter.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -54,29 +54,3 @@ def cursorless_wrapper_paired_delimiter(m) -> list[str]:
5454
except AttributeError:
5555
id = m.cursorless_wrapper_selectable_paired_delimiter
5656
return paired_delimiters[id]
57-
58-
59-
@mod.capture(
60-
rule=(
61-
"{user.cursorless_selectable_only_paired_delimiter} |"
62-
"{user.cursorless_wrapper_selectable_paired_delimiter}"
63-
)
64-
)
65-
def cursorless_selectable_paired_delimiter(m) -> str:
66-
try:
67-
return m.cursorless_selectable_only_paired_delimiter
68-
except AttributeError:
69-
return m.cursorless_wrapper_selectable_paired_delimiter
70-
71-
72-
@mod.capture(
73-
rule=(
74-
"{user.cursorless_selectable_only_paired_delimiter_plural} |"
75-
"{user.cursorless_wrapper_selectable_paired_delimiter_plural}"
76-
)
77-
)
78-
def cursorless_selectable_paired_delimiter_plural(m) -> str:
79-
try:
80-
return m.cursorless_selectable_only_paired_delimiter_plural
81-
except AttributeError:
82-
return m.cursorless_wrapper_selectable_paired_delimiter_plural

cursorless-talon/src/spoken_forms.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
)
1414
from .get_grapheme_spoken_form_entries import (
1515
get_grapheme_spoken_form_entries,
16+
get_graphemes_talon_list,
1617
grapheme_capture_name,
1718
)
1819
from .marks.decorated_mark import init_hats
1920
from .spoken_forms_output import SpokenFormsOutput
21+
from .spoken_scope_forms import init_scope_spoken_forms
2022

2123
JSON_FILE = Path(__file__).parent / "spoken_forms.json"
2224
disposables: list[Callable] = []
@@ -99,6 +101,7 @@ def update():
99101
custom_spoken_forms: dict[str, list[SpokenFormEntry]] = {}
100102
spoken_forms_output = SpokenFormsOutput()
101103
spoken_forms_output.init()
104+
graphemes_talon_list = get_graphemes_talon_list()
102105

103106
def update_spoken_forms_output():
104107
spoken_forms_output.write(
@@ -113,7 +116,7 @@ def update_spoken_forms_output():
113116
for entry in spoken_form_list
114117
if entry.list_name in LIST_TO_TYPE_MAP
115118
],
116-
*get_grapheme_spoken_form_entries(),
119+
*get_grapheme_spoken_form_entries(graphemes_talon_list),
117120
]
118121
)
119122

@@ -193,6 +196,7 @@ def handle_new_values(csv_name: str, values: list[SpokenFormEntry]):
193196
),
194197
]
195198

199+
init_scope_spoken_forms(graphemes_talon_list)
196200
update_spoken_forms_output()
197201
initialized = True
198202

0 commit comments

Comments
 (0)