@@ -100,6 +100,9 @@ enum PowerPart {
100
100
// "fluid-ounce-imperial".
101
101
constexpr int32_t kSimpleUnitOffset = 512 ;
102
102
103
+ // Trie value offset for aliases, e.g. "portion" replaced by "part"
104
+ constexpr int32_t kAliasOffset = 51200 ; // This will give a very big space for the units ids.
105
+
103
106
const struct UnitPrefixStrings {
104
107
const char * const string;
105
108
UMeasurePrefix value;
@@ -255,6 +258,58 @@ class SimpleUnitIdentifiersSink : public icu::ResourceSink {
255
258
int32_t outIndex;
256
259
};
257
260
261
+ class UnitAliasesSink : public icu ::ResourceSink {
262
+ public:
263
+ /* *
264
+ * Constructor.
265
+ * @param unitAliases The output vector of unit alias identifiers (CharString).
266
+ * @param unitReplacements The output vector of replacements for the unit aliases (CharString).
267
+ */
268
+ explicit UnitAliasesSink (MaybeStackVector<CharString> &unitAliases,
269
+ MaybeStackVector<CharString> &unitReplacements)
270
+ : unitAliases(unitAliases), unitReplacements(unitReplacements) {}
271
+
272
+ /* *
273
+ * Adds the unit alias key and its replacement to the unitAliases and unitReplacements vectors.
274
+ * @param key The unit alias identifier (e.g., "meter-and-liter").
275
+ * @param value Should be a ResourceTable value containing the replacement,
276
+ * when ures_getAllChildrenWithFallback() is called correctly for this sink.
277
+ * @param noFallback Ignored.
278
+ * @param status The standard ICU error code output parameter.
279
+ */
280
+ void put (const char *key, ResourceValue &value, UBool /* noFallback*/ ,
281
+ UErrorCode &status) override {
282
+ if (U_FAILURE (status)) return ;
283
+
284
+ // Add the unit alias key to the unitAliases vector
285
+ int32_t keyLen = static_cast <int32_t >(uprv_strlen (key));
286
+ unitAliases.emplaceBackAndCheckErrorCode (status)->append (key, keyLen, status);
287
+ if (U_FAILURE (status)) {
288
+ return ;
289
+ }
290
+
291
+ // Find the replacement for this unit alias from the alias table resource.
292
+ ResourceTable aliasTable = value.getTable (status);
293
+ if (U_FAILURE (status)) {
294
+ return ;
295
+ }
296
+
297
+ if (!aliasTable.findValue (" replacement" , value)) {
298
+ status = U_MISSING_RESOURCE_ERROR;
299
+ return ;
300
+ }
301
+
302
+ int32_t len;
303
+ const char16_t *uReplacement = value.getString (len, status);
304
+ unitReplacements.emplaceBackAndCheckErrorCode (status)->appendInvariantChars (uReplacement,
305
+ len, status);
306
+ }
307
+
308
+ private:
309
+ MaybeStackVector<CharString> &unitAliases;
310
+ MaybeStackVector<CharString> &unitReplacements;
311
+ };
312
+
258
313
/* *
259
314
* A ResourceSink that collects information from `unitQuantities` in the `units`
260
315
* resource to provide key->value lookups from base unit to category, as well as
@@ -321,6 +376,11 @@ class CategoriesSink : public icu::ResourceSink {
321
376
322
377
icu::UInitOnce gUnitExtrasInitOnce {};
323
378
379
+ // Array of unit aliases.
380
+ MaybeStackVector<icu::CharString> gUnitAliases ;
381
+ // Array of replacements for the unit aliases.
382
+ MaybeStackVector<icu::CharString> gUnitReplacements ;
383
+
324
384
// Array of simple unit IDs.
325
385
//
326
386
// The array memory itself is owned by this pointer, but the individual char* in
@@ -453,6 +513,24 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) {
453
513
simpleUnitsCount, b, kSimpleUnitOffset );
454
514
ures_getAllItemsWithFallback (unitsBundle.getAlias (), " convertUnits" , identifierSink, status);
455
515
516
+ // Populate gUnitAliases and gUnitReplacements.
517
+ LocalUResourceBundlePointer aliasBundle (ures_open (U_ICUDATA_ALIAS, " metadata" , &status));
518
+ if (U_FAILURE (status)) {
519
+ return ;
520
+ }
521
+ UnitAliasesSink aliasSink (gUnitAliases , gUnitReplacements );
522
+ ures_getAllChildrenWithFallback (aliasBundle.getAlias (), " alias/unit" , aliasSink, status);
523
+ if (U_FAILURE (status)) {
524
+ return ;
525
+ }
526
+
527
+ for (int32_t i = 0 ; i < gUnitAliases .length (); i++) {
528
+ b.add (gUnitAliases [i]->data (), i + kAliasOffset , status);
529
+ if (U_FAILURE (status)) {
530
+ return ;
531
+ }
532
+ }
533
+
456
534
// Build the CharsTrie
457
535
// TODO: Use SLOW or FAST here?
458
536
StringPiece result = b.buildStringPiece (USTRINGTRIE_BUILD_FAST, status);
@@ -479,8 +557,10 @@ class Token {
479
557
this ->fType = TYPE_INITIAL_COMPOUND_PART;
480
558
} else if (fMatch < kSimpleUnitOffset ) {
481
559
this ->fType = TYPE_POWER_PART;
482
- } else {
560
+ } else if ( fMatch < kAliasOffset ) {
483
561
this ->fType = TYPE_SIMPLE_UNIT;
562
+ } else {
563
+ this ->fType = TYPE_ALIAS;
484
564
}
485
565
}
486
566
@@ -505,6 +585,7 @@ class Token {
505
585
TYPE_POWER_PART,
506
586
TYPE_SIMPLE_UNIT,
507
587
TYPE_CONSTANT_DENOMINATOR,
588
+ TYPE_ALIAS,
508
589
};
509
590
510
591
// Calling getType() is invalid, resulting in an assertion failure, if Token
@@ -551,6 +632,11 @@ class Token {
551
632
return fMatch - kSimpleUnitOffset ;
552
633
}
553
634
635
+ int32_t getAliasIndex () const {
636
+ U_ASSERT (getType () == TYPE_ALIAS);
637
+ return static_cast <int32_t >(fMatch - kAliasOffset );
638
+ }
639
+
554
640
// TODO: Consider moving this to a separate utility class.
555
641
// Utility function to parse a string into an unsigned long value.
556
642
// The value must be a positive integer within the range [1, INT64_MAX].
@@ -673,6 +759,10 @@ class Parser {
673
759
}
674
760
675
761
if (singleUnitOrConstant.isConstantDenominator ()) {
762
+ if (result.constantDenominator > 0 ) {
763
+ status = kUnitIdentifierSyntaxError ;
764
+ return result;
765
+ }
676
766
result.constantDenominator = singleUnitOrConstant.getConstantDenominator ();
677
767
result.complexity = UMEASURE_UNIT_COMPOUND;
678
768
continue ;
@@ -728,6 +818,9 @@ class Parser {
728
818
StringPiece fSource ;
729
819
BytesTrie fTrie ;
730
820
821
+ // Storage for modified source string when aliases are expanded
822
+ CharString fModifiedSource ;
823
+
731
824
// Set to true when we've seen a "-per-" or a "per-", after which all units
732
825
// are in the denominator. Until we find an "-and-", at which point the
733
826
// identifier is invalid pending TODO(CLDR-13701).
@@ -830,6 +923,19 @@ class Parser {
830
923
return {};
831
924
}
832
925
926
+ // Handles the case where the alias replacement begins with "per-".
927
+ // For example:
928
+ // if the alias is "permeter" and the replacement is "per-meter".
929
+ // NOTE: This case does not currently exist in CLDR, but this code anticipates possible future
930
+ // additions.
931
+ if (token.getType () == Token::TYPE_ALIAS) {
932
+ processAlias (token, status);
933
+ token = nextToken (status);
934
+ if (U_FAILURE (status)) {
935
+ return {};
936
+ }
937
+ }
938
+
833
939
fJustSawPer = false ;
834
940
835
941
if (atStart) {
@@ -923,6 +1029,10 @@ class Parser {
923
1029
singleUnitResult.index = token.getSimpleUnitIndex ();
924
1030
break ;
925
1031
1032
+ case Token::TYPE_ALIAS:
1033
+ processAlias (token, status);
1034
+ break ;
1035
+
926
1036
default :
927
1037
status = kUnitIdentifierSyntaxError ;
928
1038
return {};
@@ -945,6 +1055,48 @@ class Parser {
945
1055
946
1056
return SingleUnitOrConstant::singleUnitValue (singleUnitResult);
947
1057
}
1058
+
1059
+ private:
1060
+ /* *
1061
+ * Helper function to process alias replacement.
1062
+ *
1063
+ * @param token The token of TYPE_ALIAS to process
1064
+ * @param status ICU error code
1065
+ */
1066
+ void processAlias (const Token &token, UErrorCode &status) {
1067
+ if (U_FAILURE (status)) {
1068
+ return ;
1069
+ }
1070
+
1071
+ auto aliasIndex = token.getAliasIndex ();
1072
+ if (aliasIndex < 0 || aliasIndex >= gUnitAliases .length () ||
1073
+ aliasIndex >= gUnitReplacements .length ()) {
1074
+ status = kUnitIdentifierSyntaxError ;
1075
+ return ;
1076
+ }
1077
+
1078
+ auto replacement = gUnitReplacements [aliasIndex];
1079
+
1080
+ // Create new source string: replacement + remaining unparsed portion
1081
+ fModifiedSource .clear ();
1082
+ fModifiedSource .append (replacement->data (), replacement->length (), status);
1083
+
1084
+ // Add the remaining unparsed portion of fSource which starts from fIndex
1085
+ if (fIndex < fSource .length ()) {
1086
+ StringPiece remaining = fSource .substr (fIndex );
1087
+ fModifiedSource .append (remaining.data (), remaining.length (), status);
1088
+ }
1089
+
1090
+ if (U_FAILURE (status)) {
1091
+ return ;
1092
+ }
1093
+
1094
+ // Update parser state with new source and reset index
1095
+ fSource = StringPiece (fModifiedSource .data (), fModifiedSource .length ());
1096
+ fIndex = 0 ;
1097
+
1098
+ return ;
1099
+ }
948
1100
};
949
1101
950
1102
// Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray.
0 commit comments