@@ -193,6 +193,7 @@ ldml_processor::process_event(
193193 state->actions ().clear ();
194194
195195 switch (vk) {
196+ // Special handling for backspace VK
196197 case KM_KBP_VKEY_BKSP:
197198 {
198199 if (!!bksp_transforms) {
@@ -212,6 +213,7 @@ ldml_processor::process_event(
212213 if (end != state->context ().rend ()) {
213214 if ((*end).type == KM_KBP_CT_CHAR) {
214215 last_char = (*end).character ;
216+ // TODO-LDML: markers!
215217 }
216218 }
217219 if (last_char == 0UL ) {
@@ -231,78 +233,93 @@ ldml_processor::process_event(
231233 }
232234 break ;
233235 default :
236+ // all other VKs
234237 {
235- // adapted from kmx_processor.cpp
236-
237- /* * a copy of the current/changed context, for transform use */
238- ldml::string_list ctxt;
239-
240- // Construct a context buffer of all the KM_KBP_BT_CHAR items
241- // Extract the context into 'ctxt' for transforms to process
242- if (!!transforms) {
243- // if no transforms, no reason to do this extraction (ctxt will remain empty)
244- auto &cp = state->context ();
245- // We're only interested in as much of the context as is a KM_KBP_BT_CHAR.
246- uint8_t last_type = KM_KBP_BT_UNKNOWN;
247- for (auto c = cp.rbegin (); c != cp.rend (); c++) {
248- last_type = c->type ;
249- if (last_type != KM_KBP_BT_CHAR) {
250- // not a char, stop here
251- // TODO-LDML: markers?
252- break ;
253- }
254- ctxt.emplace_front (1 , c->character );
255- // extract UTF-32 to 1 or 2 UTF-16 chars in a string
256- }
257- }
258-
259238 // Look up the key
260239 const std::u16string str = keys.lookup (vk, modifier_state);
240+
261241 if (str.empty ()) {
262- // not found, so pass the keystroke on to the Engine
242+ // no key was found, so pass the keystroke on to the Engine
263243 state->actions ().push_invalidate_context ();
264244 state->actions ().push_emit_keystroke ();
265245 break ; // ----- commit and exit
266246 }
267- // found the correct string - push it into the context and actions
247+
248+ // found a string - push it into the context and actions
249+ // we convert it here instead of using the emit_text() overload
250+ // so that we don't have to reconvert it inside the transform code.
268251 const std::u32string str32 = kmx::u16string_to_u32string (str);
269- for (const auto &ch : str32) {
270- state->context ().push_character (ch);
271- state->actions ().push_character (ch);
272- }
273- // Now process transforms
274- // Process the transforms
275- if (!!transforms) {
276- // add the newly added char to ctxt
277- ctxt.push_back (str32);
278252
253+ if (!transforms) {
254+ // No transforms: just emit the string.
255+ emit_text (state, str32);
256+ } else {
257+ // Process transforms here
258+ /* *
259+ * a copy of the current/changed context, for transform use.
260+ *
261+ */
262+ std::u32string ctxtstr;
263+ (void )context_to_string (state, ctxtstr);
264+ // add the newly added key output to ctxtstr
265+ ctxtstr.append (str32);
266+
267+ /* * the output buffer for transforms */
279268 std::u32string outputString;
280269
281- // TODO-LDML: unroll ctxt into a str. Would be better to have transforms be able to process a vector
282- std::u32string ctxtstr;
283- for (const auto &ch : ctxt) {
284- ctxtstr.append (ch);
285- }
286- // check if the context matched, and if so how much (at the end)
270+ // apply the transform, get how much matched (at the end)
287271 const size_t matchedContext = transforms->apply (ctxtstr, outputString);
288272
289- if (matchedContext > 0 ) {
290- // Found something.
291- // Now, clear out the old context
292- for (size_t i = 0 ; i < matchedContext; i++) {
293- state->context ().pop_back (); // Pop off last
294- auto deletedChar = ctxt[ctxt.size () - i - 1 ][0 ];
295- state->actions ().push_backspace (KM_KBP_BT_CHAR, deletedChar); // Cause prior char to be removed
273+ if (matchedContext == 0 ) {
274+ // No match, just emit the original string
275+ emit_text (state, str32);
276+ } else {
277+ // We have a match.
278+
279+ ctxtstr.resize (ctxtstr.length () - str32.length ());
280+ /* * how many chars of the context we need to clear */
281+ auto charsToDelete = matchedContext - str32.length (); /* we don't need to clear the output of the current key */
282+
283+ /* * how many context items need to be removed */
284+ size_t contextRemoved = 0 ;
285+ for (auto c = state->context ().rbegin (); charsToDelete > 0 && c != state->context ().rend (); c++, contextRemoved++) {
286+ /* * last char of context */
287+ km_kbp_usv lastCtx = ctxtstr.back ();
288+ uint8_t type = c->type ;
289+ assert (type == KM_KBP_BT_CHAR || type == KM_KBP_BT_MARKER);
290+ if (type == KM_KBP_BT_CHAR) {
291+ // single char, drop it
292+ charsToDelete--;
293+ assert (c->character == lastCtx);
294+ ctxtstr.pop_back ();
295+ state->actions ().push_backspace (KM_KBP_BT_CHAR, lastCtx); // Cause prior char to be removed
296+ } else if (type == KM_KBP_BT_MARKER) {
297+ // it's a marker, 'worth' 3 uchars
298+ assert (charsToDelete >= 3 );
299+ assert (lastCtx == c->marker ); // end of list
300+ charsToDelete -= 3 ;
301+ // pop off the three-part sentinel string
302+ ctxtstr.pop_back ();
303+ ctxtstr.pop_back ();
304+ ctxtstr.pop_back ();
305+ // push a special backspace to delete the marker
306+ state->actions ().push_backspace (KM_KBP_BT_MARKER, c->marker );
307+ }
296308 }
297- // Now, add in the updated text
298- for (const auto &ch : outputString ) {
299- state-> context (). push_character (ch);
300- state->actions ().push_character (ch );
309+ // now, pop the right number of context items
310+ for (size_t i = 0 ; i < contextRemoved; i++ ) {
311+ // we don't pop during the above loop because the iterator gets confused
312+ state->context ().pop_back ( );
301313 }
302- }
303- }
304- }
305- }
314+ // Now, add in the updated text. This will convert UC_SENTINEL, etc back to marker actions.
315+ emit_text (state, outputString);
316+ // If we needed it further. we could update ctxtstr here:
317+ // ctxtstr.append(outputString);
318+ // ... but it is no longer needed at this point.
319+ } // end of transform match
320+ } // end of processing transforms
321+ } // end of processing a 'normal' vk
322+ } // end of switch
306323 // end of normal processing: commit and exit
307324 state->actions ().commit ();
308325 } catch (std::bad_alloc &) {
@@ -336,5 +353,67 @@ km_kbp_status ldml_processor::validate() const {
336353 return _valid ? KM_KBP_STATUS_OK : KM_KBP_STATUS_INVALID_KEYBOARD;
337354}
338355
356+ void
357+ ldml_processor::emit_text (km_kbp_state *state, const std::u16string &str) {
358+ const std::u32string str32 = kmx::u16string_to_u32string (str);
359+ emit_text (state, str32);
360+ }
361+
362+ void
363+ ldml_processor::emit_text (km_kbp_state *state, const std::u32string &str) {
364+ for (auto it = str.begin (); it < str.end (); it++) {
365+ const auto ch = *it;
366+ // If we are at the start of a sequence:
367+ if (ch == LDML_UC_SENTINEL) {
368+ it++; // consume LDML_UC_SENTINEL
369+ // TODO-LDML: Might assert if a malformed sequence is included- "should not happen"?
370+ assert (it < str.end ());
371+ // verify that the next char is LDML_MARKER_CODE
372+ assert (*it == LDML_MARKER_CODE);
373+ it++; // consume LDML_MARKER_CODE
374+ assert (it < str.end ());
375+ const auto marker_no = *it;
376+ assert (marker_no >= LDML_MARKER_MIN_INDEX);
377+ assert (marker_no <= LDML_MARKER_ANY_INDEX);
378+ emit_marker (state, marker_no);
379+ } else {
380+ emit_text (state, ch);
381+ }
382+ }
383+ }
384+
385+ void
386+ ldml_processor::emit_text (km_kbp_state *state, km_kbp_usv ch) {
387+ assert (ch != LDML_UC_SENTINEL);
388+ state->context ().push_character (ch);
389+ state->actions ().push_character (ch);
390+ }
391+
392+ void ldml_processor::emit_marker (km_kbp_state *state, KMX_DWORD marker_no) {
393+ // OK, push the marker
394+ state->actions ().push_marker (marker_no);
395+ state->context ().push_marker (marker_no);
396+ }
397+
398+ size_t
399+ ldml_processor::context_to_string (km_kbp_state *state, std::u32string &str) {
400+ str.clear ();
401+ auto &cp = state->context ();
402+ size_t ctxlen = 0 ; // TODO-LDML: is this needed?
403+ uint8_t last_type = KM_KBP_BT_UNKNOWN;
404+ for (auto c = cp.rbegin (); c != cp.rend (); c++, ctxlen++) {
405+ last_type = c->type ;
406+ if (last_type == KM_KBP_BT_CHAR) {
407+ str.insert (0 , 1 , c->character );
408+ } else if (last_type == KM_KBP_BT_MARKER) {
409+ prepend_marker (str, c->marker );
410+ } else {
411+ break ;
412+ }
413+ }
414+ return ctxlen; // consumed the entire context buffer.
415+ }
416+
417+
339418} // namespace kbp
340419} // namespace km
0 commit comments