@@ -392,18 +392,74 @@ reorder_group::apply(std::u32string &str) const {
392392 return applied;
393393}
394394
395- transform_entry::transform_entry (const transform_entry &other) :
396- fFrom (other.fFrom ), fTo (other.fTo ), fFromPattern (nullptr ) {
395+ transform_entry::transform_entry (const transform_entry &other)
396+ : fFrom (other.fFrom ), fTo (other.fTo ), fFromPattern (nullptr ), fMapFromStrId (other.fMapFromStrId ),
397+ fMapToStrId (other.fMapToStrId ), fMapFromList(other.fMapFromList ), fMapToList(other.fMapToList ) {
397398 if (other.fFromPattern ) {
398399 // clone pattern
399400 fFromPattern .reset (other.fFromPattern ->clone ());
400401 }
401402}
402403
403- transform_entry::transform_entry (const std::u32string &from, const std::u32string &to) : fFrom (from), fTo (to) {
404+ transform_entry::transform_entry (const std::u32string &from, const std::u32string &to)
405+ : fFrom(from), fTo(to), fFromPattern(nullptr ), fMapFromStrId(), fMapToStrId(), fMapFromList(), fMapToList() {
404406 assert (!fFrom .empty ()); // TODO-LDML: should not happen?
405407
408+ init ();
409+ }
410+
411+ // TODO-LDML: How do we return errors from here?
412+ transform_entry::transform_entry (
413+ const std::u32string &from,
414+ const std::u32string &to,
415+ KMX_DWORD mapFrom,
416+ KMX_DWORD mapTo,
417+ const kmx::kmx_plus &kplus)
418+ : fFrom(from), fTo(to), fFromPattern(nullptr ), fMapFromStrId(mapFrom), fMapToStrId(mapTo) {
419+ assert (!fFrom .empty ()); // TODO-LDML: should not happen?
420+ assert ((fMapFromStrId == 0 ) == (fMapToStrId == 0 )); // we have both or we have neither.
421+ assert (kplus.strs != nullptr );
422+ assert (kplus.vars != nullptr );
423+ assert (kplus.elem != nullptr );
424+ init ();
425+
426+ // setup mapFrom
427+ if (fMapFromStrId != 0 ) {
428+ // Note: if we need the variable name it is available as follows,
429+ // but isn't needed for normal processing. Could be useful for debug messages.
430+ // auto mapFrom = kplus.strs->get(fMapFromStrId);
431+ // auto mapTo = kplus.strs->get(fMapToStrId);
432+
433+ // get the vars
434+ auto *fromVar = kplus.vars ->findByStringId (fMapFromStrId );
435+ auto *toVar = kplus.vars ->findByStringId (fMapToStrId );
436+ assert (fromVar != nullptr );
437+ assert (toVar != nullptr );
438+
439+
440+ // get the element lists
441+ assert (fromVar->type == LDML_VARS_ENTRY_TYPE_SET);
442+ assert (toVar->type == LDML_VARS_ENTRY_TYPE_SET);
443+ KMX_DWORD fromLength, toLength;
444+ auto *fromList = kplus.elem ->getElementList (fromVar->elem , fromLength);
445+ auto *toList = kplus.elem ->getElementList (toVar->elem , toLength);
446+ assert (fromLength == toLength);
447+ assert (fromList != nullptr );
448+ assert (toList != nullptr );
449+
450+ // populate the deques from the lists
451+ fMapFromList = fromList->loadAsStringList (fromLength, *(kplus.strs ));
452+ fMapToList = toList->loadAsStringList (toLength, *(kplus.strs ));
453+ // did we get the expected items?
454+ assert (fMapFromList .size () == fromLength);
455+ assert (fMapToList .size () == toLength);
456+ }
457+ }
458+
459+ void
460+ transform_entry::init () {
406461 if (!fFrom .empty ()) {
462+ // TODO-LDML: if we have mapFrom, may need to do other processing.
407463 const std::u16string patstr = km::kbp::kmx::u32string_to_u16string (fFrom );
408464 UErrorCode status = U_ZERO_ERROR;
409465 /* const */ icu::UnicodeString patustr = icu::UnicodeString (patstr.data (), (int32_t )patstr.length ());
@@ -421,7 +477,7 @@ transform_entry::apply(const std::u32string &input, std::u32string &output) cons
421477 // TODO-LDML: Also, we could cache the u16 string at the transformGroup level or higher.
422478 UErrorCode status = U_ZERO_ERROR;
423479 const std::u16string matchstr = km::kbp::kmx::u32string_to_u16string (input);
424- icu::UnicodeString matchustr = icu::UnicodeString (matchstr.data (), (int32_t )matchstr.length ());
480+ icu::UnicodeString matchustr = icu::UnicodeString (matchstr.data (), (int32_t )matchstr.length ());
425481 // TODO-LDML: create a new Matcher every time. These could be cached and reset.
426482 std::unique_ptr<icu::RegexMatcher> matcher (fFromPattern ->matcher (matchustr, status));
427483 assert (U_SUCCESS (status));
@@ -438,25 +494,65 @@ transform_entry::apply(const std::u32string &input, std::u32string &output) cons
438494 // extract..
439495 const icu::UnicodeString substr = matchustr.tempSubStringBetween (matchStart, matchEnd);
440496 // preflight to UTF-32 to get length
441- UErrorCode substrStatus = U_ZERO_ERROR;
497+ UErrorCode substrStatus = U_ZERO_ERROR; // throwaway status
498+ // we need the UTF-32 matchLen for our return.
442499 auto matchLen = substr.toUTF32 (nullptr , 0 , substrStatus);
500+
501+ // should have matched something.
443502 assert (matchLen > 0 );
444- if (matchLen == 0 ) {
445- return 0 ;
446- }
447- // Now, we have a matchLen.
448503
449504 // now, do the replace.
450- // Convert the fTo into u16 TODO-LDML (we could cache this?)
451- const std::u16string rstr = km::kbp::kmx::u32string_to_u16string (fTo );
452- icu::UnicodeString rustr = icu::UnicodeString (rstr.data (), (int32_t )rstr.length ());
453- // This replace will apply $1, $2 etc. TODO-LDML it will NOT handle mapFrom or mapTo.
505+
506+ /* * this is the 'to' or other replacement string.*/
507+ icu::UnicodeString rustr;
508+ if (fMapFromStrId == 0 ) {
509+ // Normal case: not a map.
510+ // This replace will apply $1, $2 etc.
511+ // Convert the fTo into u16 TODO-LDML (we could cache this?)
512+ const std::u16string rstr = km::kbp::kmx::u32string_to_u16string (fTo );
513+ rustr = icu::UnicodeString (rstr.data (), (int32_t )rstr.length ());
514+ } else {
515+ // Set map case: mapping from/to
516+
517+ // we actually need the group(1) string here.
518+ // this is only the content in parenthesis ()
519+ icu::UnicodeString group1 = matcher->group (1 , status);
520+ assert (U_SUCCESS (status)); // TODO-LDML: could be a malformed from pattern
521+ // now, how long is group1 in UTF-32, hmm?
522+ UErrorCode preflightStatus = U_ZERO_ERROR; // throwaway status
523+ auto group1Len = group1.toUTF32 (nullptr , 0 , preflightStatus);
524+ char32_t *s = new char32_t [group1Len + 1 ];
525+ assert (s != nullptr ); // TODO-LDML: OOM
526+ // convert
527+ substr.toUTF32 ((UChar32 *)s, group1Len + 1 , status);
528+ assert (U_SUCCESS (status));
529+ std::u32string match32 (s, group1Len); // taken from just group1
530+ // clean up buffer
531+ delete [] s;
532+
533+ // Now we're ready to do the actual mapping.
534+
535+ // 1., we need to find the index in the source set.
536+ auto matchIndex = findIndexFrom (match32);
537+ assert (matchIndex != -1L ); // TODO-LDML: not matching shouldn't happen, the regex wouldn't have matched.
538+ // we already asserted on load that the from and to sets have the same cardinality.
539+
540+ // 2. get the target string, convert to utf-16
541+ // we use the same matchIndex that was just found
542+ const std::u16string rstr = km::kbp::kmx::u32string_to_u16string (fMapToList .at (matchIndex));
543+
544+ // 3. update the UnicodeString for replacement
545+ rustr = icu::UnicodeString (rstr.data (), (int32_t )rstr.length ());
546+ // and we return to the regular code flow.
547+ }
548+ // here we replace the match output.
454549 icu::UnicodeString entireOutput = matcher->replaceFirst (rustr, status);
455- assert (U_SUCCESS (status));
550+ assert (U_SUCCESS (status)); // TODO-LDML: could fail here due to bad input (syntax err)
551+
456552 // entireOutput includes all of 'input', but modified. Need to substring it.
457553 icu::UnicodeString outu = entireOutput.tempSubString (matchStart);
458554
459- // Special case if there's no output
555+ // Special case if there's no output, save some allocs
460556 if (outu.length () == 0 ) {
461557 output.clear ();
462558 } else {
@@ -479,6 +575,20 @@ transform_entry::apply(const std::u32string &input, std::u32string &output) cons
479575 return matchLen;
480576}
481577
578+ int32_t transform_entry::findIndexFrom (const std::u32string &match) const {
579+ return findIndex (match, fMapFromList );
580+ }
581+
582+ int32_t transform_entry::findIndex (const std::u32string &match, const std::deque<std::u32string> list) {
583+ int32_t index = 0 ;
584+ for (auto e = list.begin (); e < list.end (); e++, index++) {
585+ if (match == *e) {
586+ return index;
587+ }
588+ }
589+ return -1 ; // not found
590+ }
591+
482592any_group::any_group (const transform_group &g) : type(any_group_type::transform), transform(g), reorder() {
483593}
484594
@@ -686,16 +796,9 @@ transforms::load(
686796 const kmx::COMP_KMXPLUS_TRAN_TRANSFORM *element = tranHelper.getTransform (group->index + itemNumber);
687797 const std::u32string fromStr = kmx::u16string_to_u32string (kplus.strs ->get (element->from ));
688798 const std::u32string toStr = kmx::u16string_to_u32string (kplus.strs ->get (element->to ));
689- std::u16string mapFrom, mapTo;
690-
691- if (element->mapFrom && element->mapTo ) {
692- // strings: variable name of from/to
693- // TODO-LDML: not implemented
694- mapFrom = kplus.strs ->get (element->mapFrom );
695- mapTo = kplus.strs ->get (element->mapTo );
696- }
697-
698- newGroup.emplace_back (fromStr, toStr /* ,mapFrom, mapTo */ ); // creating a transform_entry
799+ KMX_DWORD mapFrom = element->mapFrom ; // copy, because of alignment
800+ KMX_DWORD mapTo = element->mapTo ; // copy, because of alignment
801+ newGroup.emplace_back (fromStr, toStr, mapFrom, mapTo, kplus); // creating a transform_entry
699802 }
700803 transforms->addGroup (newGroup);
701804 } else if (group->type == LDML_TRAN_GROUP_TYPE_REORDER) {
0 commit comments