Skip to content

Commit fb1691a

Browse files
macchiatisrl295
andauthored
CLDR-17459 Add units for grammar (#3682)
- BRS: move to -z BUILD Co-authored-by: Steven R. Loomis <[email protected]>
1 parent 999aa06 commit fb1691a

File tree

5 files changed

+118
-20
lines changed

5 files changed

+118
-20
lines changed

.github/workflows/maven.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ jobs:
187187
env:
188188
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
189189
- name: run CLDR console check
190-
run: java -DCLDR_GITHUB_ANNOTATIONS=true -DCLDR_DIR=$(pwd) -Xmx6g -jar tools/cldr-code/target/cldr-code.jar check -S common,seed -e -z FINAL_TESTING
190+
run: java -DCLDR_GITHUB_ANNOTATIONS=true -DCLDR_DIR=$(pwd) -Xmx6g -jar tools/cldr-code/target/cldr-code.jar check -S common,seed -e -z BUILD
191191
deploy:
192192
# don't run deploy on manual builds!
193193
if: github.repository == 'unicode-org/cldr' && github.event_name == 'push' && github.ref == 'refs/heads/main' && github.event.inputs.git-ref == ''

common/supplemental/units.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ For terms of use, see http://www.unicode.org/copyright.html
289289
<convertUnit source='pascal' baseUnit='kilogram-per-meter-square-second' systems="si metric prefixable"/>
290290
<convertUnit source='bar' baseUnit='kilogram-per-meter-square-second' factor='100000' systems="si_acceptable metric prefixable"/>
291291
<convertUnit source='atmosphere' baseUnit='kilogram-per-meter-square-second' factor='101325' systems="ussystem uksystem "/>
292-
<convertUnit source='gasoline-energy-density' baseUnit='kilogram-per-meter-square-second' factor='33.705 * 3600 * 1000/gal_to_m3' systems="metric_adjacent ussystem uksystem" description="Constructed so that 1 gallon-gasoline-energy-density = 33.705 kWh as per https://www3.epa.gov/otaq/gvg/learn-more-technology.htm"/>
292+
<convertUnit source='gasoline-energy-density' baseUnit='kilogram-per-meter-square-second' factor='33.705 * 3600 * 1000/gal_to_m3' systems="ussystem uksystem" description="Constructed so that 1 gallon-gasoline-energy-density = 33.705 kWh as per https://www3.epa.gov/otaq/gvg/learn-more-technology.htm"/>
293293

294294
<!-- pressure-per-length -->
295295
<convertUnit source='ofhg' baseUnit='kilogram-per-square-meter-square-second' factor='13595.1*gravity' systems="metric_adjacent uksystem ussystem"/>

tools/cldr-code/src/main/java/org/unicode/cldr/util/GrammarInfo.java

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -745,21 +745,19 @@ public static Set<String> getGrammarLocales() {
745745
"month",
746746
"year");
747747

748+
// To see a list of the results for double-checking, run TestUnits with TestUnitsToTranslate -v
748749
static final Set<String> EXCLUDE_GRAMMAR =
749750
Set.of(
750-
"point",
751-
"dunam",
752-
"dot",
753-
"astronomical-unit",
754-
"nautical-mile",
755-
"knot",
756-
"dalton",
751+
"dot", // fallback is pixel
752+
"dot-per-centimeter", // fallback is pixel
753+
"dunam", // language-specific
754+
"astronomical-unit", // specialized
755+
"nautical-mile", // US/UK specific
756+
"knot", // US/UK specific
757+
"dalton", // specialized
758+
"electronvolt", // specialized
757759
"kilocalorie",
758-
"electronvolt",
759-
// The following may be reinstated after 45.
760-
"dot-per-centimeter",
761-
"millimeter-ofhg",
762-
"milligram-ofglucose-per-deciliter");
760+
"point");
763761

764762
public static Set<String> getSpecialsToTranslate() {
765763
return INCLUDE_OTHER;

tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
import com.google.common.collect.Multimap;
1414
import com.google.common.collect.Sets;
1515
import com.google.common.collect.TreeMultimap;
16+
import com.ibm.icu.impl.Row;
1617
import com.ibm.icu.impl.Row.R2;
18+
import com.ibm.icu.impl.Row.R4;
1719
import com.ibm.icu.lang.UCharacter;
1820
import com.ibm.icu.number.UnlocalizedNumberFormatter;
1921
import com.ibm.icu.text.PluralRules;
@@ -29,6 +31,7 @@
2931
import java.util.Collections;
3032
import java.util.Comparator;
3133
import java.util.EnumSet;
34+
import java.util.HashSet;
3235
import java.util.Iterator;
3336
import java.util.LinkedHashMap;
3437
import java.util.LinkedHashSet;
@@ -48,7 +51,9 @@
4851
import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
4952
import org.unicode.cldr.util.Rational.FormatStyle;
5053
import org.unicode.cldr.util.Rational.RationalParser;
54+
import org.unicode.cldr.util.StandardCodes.LstrType;
5155
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
56+
import org.unicode.cldr.util.Validity.Status;
5257

5358
public class UnitConverter implements Freezable<UnitConverter> {
5459
public static boolean DEBUG = false;
@@ -80,7 +85,7 @@ public class UnitConverter implements Freezable<UnitConverter> {
8085
private Multimap<String, UnitSystem> sourceToSystems = TreeMultimap.create();
8186
private Set<String> baseUnits;
8287
private Multimap<String, Continuation> continuations = TreeMultimap.create();
83-
private Comparator<String> quantityComparator;
88+
private MapComparator<String> quantityComparator;
8489

8590
private Map<String, String> fixDenormalized;
8691
private ImmutableMap<String, UnitId> idToUnitId;
@@ -92,6 +97,17 @@ public class UnitConverter implements Freezable<UnitConverter> {
9297

9398
public TargetInfoComparator targetInfoComparator;
9499

100+
private final MapComparator<String> LongUnitIdOrder = new MapComparator<>();
101+
private final MapComparator<String> ShortUnitIdOrder = new MapComparator<>();
102+
103+
public Comparator<String> getLongUnitIdComparator() {
104+
return LongUnitIdOrder;
105+
}
106+
107+
public Comparator<String> getShortUnitIdComparator() {
108+
return ShortUnitIdOrder;
109+
}
110+
95111
/** Warning: ordering is important; determines the normalized output */
96112
public static final Set<String> BASE_UNITS =
97113
ImmutableSet.of(
@@ -198,6 +214,74 @@ public UnitConverter freeze() {
198214
}
199215
}
200216
idToUnitId = ImmutableMap.copyOf(_idToUnitId);
217+
218+
// build the map comparators
219+
220+
Set<R4<Integer, UnitSystem, Rational, String>> all = new TreeSet<>();
221+
Set<String> baseSeen = new HashSet<>();
222+
for (String longUnit :
223+
Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) {
224+
Output<String> base = new Output<>();
225+
String shortUnit = getShortId(longUnit);
226+
ConversionInfo conversionInfo = parseUnitId(shortUnit, base, false);
227+
if (conversionInfo == null) {
228+
if (longUnit.equals("temperature-generic")) {
229+
conversionInfo = parseUnitId("kelvin", base, false);
230+
}
231+
}
232+
String quantity = getQuantityFromUnit(base.value, false);
233+
Integer quantityNumericOrder = quantityComparator.getNumericOrder(quantity);
234+
if (quantityNumericOrder == null) { // try the inverse
235+
if (base.value.equals("meter-per-cubic-meter")) { // HACK
236+
quantityNumericOrder = quantityComparator.getNumericOrder("consumption");
237+
}
238+
if (quantityNumericOrder == null) {
239+
throw new IllegalArgumentException(
240+
"Missing quantity for: " + base.value + ", " + shortUnit);
241+
}
242+
}
243+
244+
final EnumSet<UnitSystem> systems = EnumSet.copyOf(getSystemsEnum(shortUnit));
245+
246+
// to sort the right items together items together, put together a sort key
247+
UnitSystem sortingSystem = systems.iterator().next();
248+
switch (sortingSystem) {
249+
case metric:
250+
case si:
251+
case si_acceptable:
252+
case astronomical:
253+
case metric_adjacent:
254+
case person_age:
255+
sortingSystem = UnitSystem.metric;
256+
break;
257+
// country specific
258+
case other:
259+
case ussystem:
260+
case uksystem:
261+
case jpsystem:
262+
sortingSystem = UnitSystem.other;
263+
break;
264+
default:
265+
throw new IllegalArgumentException(
266+
"Add new unitSystem to a grouping: " + sortingSystem);
267+
}
268+
R4<Integer, UnitSystem, Rational, String> sortKey =
269+
Row.of(
270+
quantityNumericOrder,
271+
sortingSystem,
272+
conversionInfo.factor,
273+
shortUnit);
274+
all.add(sortKey);
275+
}
276+
LongUnitIdOrder.setErrorOnMissing(true);
277+
ShortUnitIdOrder.setErrorOnMissing(true);
278+
for (R4<Integer, UnitSystem, Rational, String> item : all) {
279+
String shortId = item.get3();
280+
ShortUnitIdOrder.add(shortId);
281+
LongUnitIdOrder.add(getLongId(shortId));
282+
}
283+
LongUnitIdOrder.freeze();
284+
ShortUnitIdOrder.freeze();
201285
}
202286
return this;
203287
}
@@ -649,7 +733,7 @@ private void addToSourceToTarget(
649733
}
650734
}
651735

652-
private Comparator<String> getQuantityComparator(
736+
private MapComparator<String> getQuantityComparator(
653737
Map<String, String> baseUnitToQuantity2, Map<String, String> baseUnitToStatus2) {
654738
// We want to sort all the quantities so that we have a natural ordering within compound
655739
// units. So kilowatt-hour, not hour-kilowatt.

tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3544,7 +3544,22 @@ enum TranslationStatus {
35443544
has_grammar_X,
35453545
add_grammar,
35463546
skip_grammar,
3547-
skip_trans
3547+
skip_trans("\t— specific langs poss.)");
3548+
3549+
private TranslationStatus() {
3550+
outName = name();
3551+
}
3552+
3553+
private final String outName;
3554+
3555+
private TranslationStatus(String extra) {
3556+
outName = name() + extra;
3557+
}
3558+
3559+
@Override
3560+
public String toString() {
3561+
return outName;
3562+
}
35483563
}
35493564

35503565
/**
@@ -3555,7 +3570,8 @@ public void TestUnitsToTranslate() {
35553570
Set<String> toTranslate = GrammarInfo.getUnitsToAddGrammar();
35563571
final CLDRConfig config = CLDRConfig.getInstance();
35573572
final UnitConverter converter = config.getSupplementalDataInfo().getUnitConverter();
3558-
Map<String, TranslationStatus> shortUnitToTranslationStatus40 = new TreeMap<>();
3573+
Map<String, TranslationStatus> shortUnitToTranslationStatus40 =
3574+
new TreeMap<>(converter.getShortUnitIdComparator());
35593575
for (String longUnit :
35603576
Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) {
35613577
String shortUnit = converter.getShortId(longUnit);
@@ -3588,9 +3604,9 @@ public void TestUnitsToTranslate() {
35883604
TranslationStatus status40 = entry.getValue();
35893605
if (isVerbose())
35903606
System.out.println(
3591-
shortUnit
3607+
converter.getQuantityFromUnit(shortUnit, false)
35923608
+ "\t"
3593-
+ converter.getQuantityFromUnit(shortUnit, false)
3609+
+ shortUnit
35943610
+ "\t"
35953611
+ converter.getSystemsEnum(shortUnit)
35963612
+ "\t"

0 commit comments

Comments
 (0)