Skip to content

Commit 06f2c64

Browse files
authored
Merge pull request #9405 from keymanapp/feat/core/9119-marker-core-epic-ldml
2 parents c6c2581 + 2bf22c4 commit 06f2c64

File tree

10 files changed

+343
-81
lines changed

10 files changed

+343
-81
lines changed

core/src/ldml/ldml_processor.cpp

Lines changed: 135 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ ldml_processor::process_event(
193193
state->actions().clear();
194194

195195
switch (vk) {
196+
// Special handling for backspace VK
196197
case KM_KBP_VKEY_BKSP:
197198
{
198199
if (!!bksp_transforms) {
@@ -212,6 +213,7 @@ ldml_processor::process_event(
212213
if(end != state->context().rend()) {
213214
if((*end).type == KM_KBP_CT_CHAR) {
214215
last_char = (*end).character;
216+
// TODO-LDML: markers!
215217
}
216218
}
217219
if (last_char == 0UL) {
@@ -231,78 +233,93 @@ ldml_processor::process_event(
231233
}
232234
break;
233235
default:
236+
// all other VKs
234237
{
235-
// adapted from kmx_processor.cpp
236-
237-
/** a copy of the current/changed context, for transform use */
238-
ldml::string_list ctxt;
239-
240-
// Construct a context buffer of all the KM_KBP_BT_CHAR items
241-
// Extract the context into 'ctxt' for transforms to process
242-
if (!!transforms) {
243-
// if no transforms, no reason to do this extraction (ctxt will remain empty)
244-
auto &cp = state->context();
245-
// We're only interested in as much of the context as is a KM_KBP_BT_CHAR.
246-
uint8_t last_type = KM_KBP_BT_UNKNOWN;
247-
for (auto c = cp.rbegin(); c != cp.rend(); c++) {
248-
last_type = c->type;
249-
if (last_type != KM_KBP_BT_CHAR) {
250-
// not a char, stop here
251-
// TODO-LDML: markers?
252-
break;
253-
}
254-
ctxt.emplace_front(1, c->character);
255-
// extract UTF-32 to 1 or 2 UTF-16 chars in a string
256-
}
257-
}
258-
259238
// Look up the key
260239
const std::u16string str = keys.lookup(vk, modifier_state);
240+
261241
if (str.empty()) {
262-
// not found, so pass the keystroke on to the Engine
242+
// no key was found, so pass the keystroke on to the Engine
263243
state->actions().push_invalidate_context();
264244
state->actions().push_emit_keystroke();
265245
break; // ----- commit and exit
266246
}
267-
// found the correct string - push it into the context and actions
247+
248+
// found a string - push it into the context and actions
249+
// we convert it here instead of using the emit_text() overload
250+
// so that we don't have to reconvert it inside the transform code.
268251
const std::u32string str32 = kmx::u16string_to_u32string(str);
269-
for (const auto &ch : str32) {
270-
state->context().push_character(ch);
271-
state->actions().push_character(ch);
272-
}
273-
// Now process transforms
274-
// Process the transforms
275-
if (!!transforms) {
276-
// add the newly added char to ctxt
277-
ctxt.push_back(str32);
278252

253+
if (!transforms) {
254+
// No transforms: just emit the string.
255+
emit_text(state, str32);
256+
} else {
257+
// Process transforms here
258+
/**
259+
* a copy of the current/changed context, for transform use.
260+
*
261+
*/
262+
std::u32string ctxtstr;
263+
(void)context_to_string(state, ctxtstr);
264+
// add the newly added key output to ctxtstr
265+
ctxtstr.append(str32);
266+
267+
/** the output buffer for transforms */
279268
std::u32string outputString;
280269

281-
// TODO-LDML: unroll ctxt into a str. Would be better to have transforms be able to process a vector
282-
std::u32string ctxtstr;
283-
for (const auto &ch : ctxt) {
284-
ctxtstr.append(ch);
285-
}
286-
// check if the context matched, and if so how much (at the end)
270+
// apply the transform, get how much matched (at the end)
287271
const size_t matchedContext = transforms->apply(ctxtstr, outputString);
288272

289-
if (matchedContext > 0) {
290-
// Found something.
291-
// Now, clear out the old context
292-
for (size_t i = 0; i < matchedContext; i++) {
293-
state->context().pop_back(); // Pop off last
294-
auto deletedChar = ctxt[ctxt.size() - i - 1][0];
295-
state->actions().push_backspace(KM_KBP_BT_CHAR, deletedChar); // Cause prior char to be removed
273+
if (matchedContext == 0) {
274+
// No match, just emit the original string
275+
emit_text(state, str32);
276+
} else {
277+
// We have a match.
278+
279+
ctxtstr.resize(ctxtstr.length() - str32.length());
280+
/** how many chars of the context we need to clear */
281+
auto charsToDelete = matchedContext - str32.length(); /* we don't need to clear the output of the current key */
282+
283+
/** how many context items need to be removed */
284+
size_t contextRemoved = 0;
285+
for (auto c = state->context().rbegin(); charsToDelete > 0 && c != state->context().rend(); c++, contextRemoved++) {
286+
/** last char of context */
287+
km_kbp_usv lastCtx = ctxtstr.back();
288+
uint8_t type = c->type;
289+
assert(type == KM_KBP_BT_CHAR || type == KM_KBP_BT_MARKER);
290+
if (type == KM_KBP_BT_CHAR) {
291+
// single char, drop it
292+
charsToDelete--;
293+
assert(c->character == lastCtx);
294+
ctxtstr.pop_back();
295+
state->actions().push_backspace(KM_KBP_BT_CHAR, lastCtx); // Cause prior char to be removed
296+
} else if (type == KM_KBP_BT_MARKER) {
297+
// it's a marker, 'worth' 3 uchars
298+
assert(charsToDelete >= 3);
299+
assert(lastCtx == c->marker); // end of list
300+
charsToDelete -= 3;
301+
// pop off the three-part sentinel string
302+
ctxtstr.pop_back();
303+
ctxtstr.pop_back();
304+
ctxtstr.pop_back();
305+
// push a special backspace to delete the marker
306+
state->actions().push_backspace(KM_KBP_BT_MARKER, c->marker);
307+
}
296308
}
297-
// Now, add in the updated text
298-
for (const auto &ch : outputString) {
299-
state->context().push_character(ch);
300-
state->actions().push_character(ch);
309+
// now, pop the right number of context items
310+
for (size_t i = 0; i < contextRemoved; i++) {
311+
// we don't pop during the above loop because the iterator gets confused
312+
state->context().pop_back();
301313
}
302-
}
303-
}
304-
}
305-
}
314+
// Now, add in the updated text. This will convert UC_SENTINEL, etc back to marker actions.
315+
emit_text(state, outputString);
316+
// If we needed it further. we could update ctxtstr here:
317+
// ctxtstr.append(outputString);
318+
// ... but it is no longer needed at this point.
319+
} // end of transform match
320+
} // end of processing transforms
321+
} // end of processing a 'normal' vk
322+
} // end of switch
306323
// end of normal processing: commit and exit
307324
state->actions().commit();
308325
} catch (std::bad_alloc &) {
@@ -336,5 +353,67 @@ km_kbp_status ldml_processor::validate() const {
336353
return _valid ? KM_KBP_STATUS_OK : KM_KBP_STATUS_INVALID_KEYBOARD;
337354
}
338355

356+
void
357+
ldml_processor::emit_text(km_kbp_state *state, const std::u16string &str) {
358+
const std::u32string str32 = kmx::u16string_to_u32string(str);
359+
emit_text(state, str32);
360+
}
361+
362+
void
363+
ldml_processor::emit_text(km_kbp_state *state, const std::u32string &str) {
364+
for (auto it = str.begin(); it < str.end(); it++) {
365+
const auto ch = *it;
366+
// If we are at the start of a sequence:
367+
if (ch == LDML_UC_SENTINEL) {
368+
it++; // consume LDML_UC_SENTINEL
369+
// TODO-LDML: Might assert if a malformed sequence is included- "should not happen"?
370+
assert(it < str.end());
371+
// verify that the next char is LDML_MARKER_CODE
372+
assert(*it == LDML_MARKER_CODE);
373+
it++; // consume LDML_MARKER_CODE
374+
assert(it < str.end());
375+
const auto marker_no = *it;
376+
assert(marker_no >= LDML_MARKER_MIN_INDEX);
377+
assert(marker_no <= LDML_MARKER_ANY_INDEX);
378+
emit_marker(state, marker_no);
379+
} else {
380+
emit_text(state, ch);
381+
}
382+
}
383+
}
384+
385+
void
386+
ldml_processor::emit_text(km_kbp_state *state, km_kbp_usv ch) {
387+
assert(ch != LDML_UC_SENTINEL);
388+
state->context().push_character(ch);
389+
state->actions().push_character(ch);
390+
}
391+
392+
void ldml_processor::emit_marker(km_kbp_state *state, KMX_DWORD marker_no) {
393+
// OK, push the marker
394+
state->actions().push_marker(marker_no);
395+
state->context().push_marker(marker_no);
396+
}
397+
398+
size_t
399+
ldml_processor::context_to_string(km_kbp_state *state, std::u32string &str) {
400+
str.clear();
401+
auto &cp = state->context();
402+
size_t ctxlen = 0; // TODO-LDML: is this needed?
403+
uint8_t last_type = KM_KBP_BT_UNKNOWN;
404+
for (auto c = cp.rbegin(); c != cp.rend(); c++, ctxlen++) {
405+
last_type = c->type;
406+
if (last_type == KM_KBP_BT_CHAR) {
407+
str.insert(0, 1, c->character);
408+
} else if (last_type == KM_KBP_BT_MARKER) {
409+
prepend_marker(str, c->marker);
410+
} else {
411+
break;
412+
}
413+
}
414+
return ctxlen; // consumed the entire context buffer.
415+
}
416+
417+
339418
} // namespace kbp
340419
} // namespace km

core/src/ldml/ldml_processor.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,34 @@ namespace kbp {
8383
km_kbp_keyboard_key * get_key_list() const override;
8484

8585
km_kbp_keyboard_imx * get_imx_list() const override;
86+
87+
private:
88+
/** emit text to context and actions */
89+
static void emit_text(km_kbp_state *state, const std::u16string &str);
90+
/** emit text to context and actions */
91+
static void emit_text(km_kbp_state *state, const std::u32string &str);
92+
/** emit char to context and actions */
93+
static void emit_text(km_kbp_state *state, km_kbp_usv ch);
94+
/** emit a marker */
95+
static void emit_marker(km_kbp_state *state, KMX_DWORD marker);
96+
97+
/**
98+
* add the string+marker portion of the context to the beginning of str.
99+
* Stop when a non-string and non-marker is hit.
100+
* Convert markers into the UC_SENTINEL format.
101+
* @return the number of context items consumed
102+
*/
103+
static size_t context_to_string(km_kbp_state *state, std::u32string &str);
104+
105+
/** prepend the marker string in UC_SENTINEL format to the str */
106+
inline static void prepend_marker(std::u32string &str, KMX_DWORD marker);
86107
};
108+
109+
void
110+
ldml_processor::prepend_marker(std::u32string &str, KMX_DWORD marker) {
111+
km_kbp_usv triple[] = {LDML_UC_SENTINEL, LDML_MARKER_CODE, marker};
112+
str.insert(0, triple, 3);
113+
}
114+
87115
} // namespace kbp
88116
} // namespace km
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE keyboardTest SYSTEM "../../../../../resources/standards-data/ldml-keyboards/techpreview/dtd/ldmlKeyboardTest.dtd">
3+
<keyboardTest conformsTo="techpreview">
4+
<info keyboard="k_210_marker.xml" author="Team Keyboard" name="marker" />
5+
<tests name="marker-tests">
6+
<test name="marker-test-basic-grave">
7+
<startContext to="" />
8+
<keystroke key="e" />
9+
<keystroke key="grave" />
10+
<check result="è" />
11+
</test>
12+
<test name="marker-test-basic-acute">
13+
<startContext to="" />
14+
<keystroke key="e" />
15+
<keystroke key="acute" />
16+
<check result="é" />
17+
</test>
18+
<test name="marker-test-double-acute">
19+
<startContext to="" />
20+
<keystroke key="acute" />
21+
<keystroke key="acute" />
22+
<check result="+" />
23+
</test>
24+
<test name="marker-test-trailing-acute">
25+
<startContext to="" />
26+
<keystroke key="acute" />
27+
<!-- TODO-LDML: broken, becasue without a rule to 'cleanup' trailing acute, we don't currently have code to fix it -->
28+
<check result="" />
29+
</test>
30+
<test name="marker-test-trailing-grave">
31+
<startContext to="" />
32+
<keystroke key="grave" />
33+
<check result="_" />
34+
</test>
35+
<test name="marker-test-append-grave">
36+
<startContext to="e" />
37+
<keystroke key="grave" />
38+
<check result="è" />
39+
</test>
40+
<test name="marker-test-append-acute">
41+
<startContext to="e" />
42+
<keystroke key="acute" />
43+
<check result="é" />
44+
</test>
45+
</tests>
46+
</keyboardTest>
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<!--
4+
Test Keyboard
5+
-->
6+
7+
<!DOCTYPE keyboard SYSTEM "../../../../../resources/standards-data/ldml-keyboards/techpreview/dtd/ldmlKeyboard.dtd">
8+
<keyboard locale="en" conformsTo="techpreview">
9+
<info author="srl295" indicator="🙀" layout="qwerty" normalization="NFC" />
10+
<names>
11+
<name value="Marker Test" />
12+
</names>
13+
14+
<displays>
15+
<display to="\m{grave}" display="`" />
16+
<display to="\m{acute}" display="´" />
17+
</displays>
18+
19+
<keys>
20+
<key id="grave" to="\m{grave}" />
21+
<key id="acute" to="\m{acute}" />
22+
<key id="caret" to="C" /> <!-- see transform -->
23+
<key id="hacek" to="H" /> <!-- see transform -->
24+
</keys>
25+
26+
<layers form="us">
27+
<layer modifier="none" id="base">
28+
<row keys="grave acute caret hacek" />
29+
<row keys="q w e" /> <!-- etc -->
30+
<row keys="a s d" /> <!-- etc -->
31+
<row keys="z x c" /> <!-- etc -->
32+
</layer>
33+
</layers>
34+
35+
<transforms type="simple">
36+
<transformGroup>
37+
<transform from="C" to="\m{caret}" />
38+
<transform from="H" to="\m{hacek}" />
39+
</transformGroup>
40+
<transformGroup>
41+
<transform from="\m{acute}\m{acute}" to="+" />
42+
<transform from="e\m{acute}" to="é" />
43+
<transform from="e\m{grave}" to="è" />
44+
<transform from="e\m{caret}" to="ê" />
45+
<transform from="e\m{hacek}" to="e\u{030C}" />
46+
<transform from="c\m{hacek}" to="č" />
47+
</transformGroup>
48+
<transformGroup>
49+
<transform from="\m{grave}" to="_" /> <!-- trailing grave becomes _ -->
50+
<!-- no cleanup for trailing acute -->
51+
</transformGroup>
52+
</transforms>
53+
</keyboard>

core/tests/unit/ldml/keyboards/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ tests_with_testdata = [
3434
'k_001_tiny',
3535
'k_020_fr', # TODO-LDML: move to cldr above (fix vkey)
3636
'k_200_reorder_nod_Lana',
37+
'k_210_marker',
3738
]
3839

3940
tests = tests_without_testdata

developer/src/kmc-ldml/src/compiler/keys.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ export class KeysCompiler extends SectionCompiler {
165165
key.longPress
166166
);
167167
const longPressDefault = sections.strs.allocAndUnescapeString(
168-
// TODO-LDML: markers,variables
169-
key.longPressDefault
168+
// TODO-LDML: variables
169+
sections.vars.substituteMarkerString(key.longPressDefault),
170170
);
171171
const multiTap: ListItem = sections.list.allocListFromEscapedSpaces(
172172
sections.strs,

0 commit comments

Comments
 (0)