Skip to content

Commit 43afd6f

Browse files
committed
ICU-23214 generate brkitr/root.txt lstm from data
- update cldr-icu.md - see TODOs in ICU-23215
1 parent a9f9b5c commit 43afd6f

File tree

5 files changed

+16
-19
lines changed

5 files changed

+16
-19
lines changed

docs/processes/cldr-icu.md

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -297,17 +297,8 @@ ant copy-cldr-testdata
297297
5d. NOP
298298
(This step has been subsumed into 5c above)
299299

300-
5e. For now, manually re-add the `lstm` entries in `data/brkitr/root.txt`
301-
```sh
302-
open $ICU4C_DIR/source/data/brkitr/root.txt
303-
```
304-
Paste the following block after the dictionaries block and before the final closing '}':
305-
```
306-
lstm{
307-
Thai{"Thai_graphclust_model4_heavy.res"}
308-
Mymr{"Burmese_graphclust_model5_heavy.res"}
309-
}
310-
```
300+
5e. NOP
301+
(This step is no longer necessary, see [ICU-23215](https://unicode-org.atlassian.net/browse/ICU-23215) for details.)
311302

312303
5f. Update hard-coded lists in ICU
313304

@@ -499,7 +490,7 @@ rebuilding of other kinds of data and/or code. For example:
499490

500491
If you see a failure such as
501492
```
502-
MeasureUnitTest testCLDRUnitAvailability Failure (MeasureUnitTest.java:3410) : Unit present in CLDR but not available via constant in MeasureUnit: speed-beaufort
493+
MeasureUnitTest testCLDRUnitAvailability Failure (MeasureUnitTest.java:3410) : Unit present in CLDR but not available via constant in MeasureUnit: speed-beaufort
503494
```
504495
then you will need to update the C and J library and test code for new measurement
505496
units, see the procedure at

icu4c/source/data/brkitr/root.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ root{
2323
Thai:process(dependency){"thaidict.dict"}
2424
}
2525
lstm{
26-
Thai{"Thai_graphclust_model4_heavy.res"}
2726
Mymr{"Burmese_graphclust_model5_heavy.res"}
27+
Thai{"Thai_graphclust_model4_heavy.res"}
2828
}
2929
}

icu4c/source/data/xml/brkitr/root.xml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
<ldml>
1414
<identity>
1515
<version number="$Revision$"/>
16-
<language type="root"/>
16+
<language type="root"/>
1717
</identity>
1818
<special xmlns:icu="http://www.icu-project.org/">
1919
<icu:breakIteratorData>
@@ -36,13 +36,10 @@
3636
<icu:dictionary type="Mymr" icu:dependency="burmesedict.dict"/>
3737
<icu:dictionary type="Thai" icu:dependency="thaidict.dict"/>
3838
</icu:dictionaries>
39-
<!--
4039
<icu:lstm>
4140
<icu:lstmdata type="Thai" icu:dependency="Thai_graphclust_model4_heavy.res"/>
4241
<icu:lstmdata type="Mymr" icu:dependency="Burmese_graphclust_model5_heavy.res"/>
4342
</icu:lstm>
44-
-->
4543
</icu:breakIteratorData>
4644
</special>
4745
</ldml>
48-

icu4c/source/test/testdata/BUILDRULES.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def generate_rb(config, io, common_vars):
4444
"testaliases",
4545
"testempty",
4646
"testtypes",
47-
# LSTM models
47+
# LSTM models - TODO ICU-23215 this should be dynamic
4848
"Thai_graphclust_model4_heavy",
4949
"Thai_codepoints_exclusive_model5_heavy",
5050
"Burmese_graphclust_model5_heavy"

tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/mapper/BreakIteratorMapper.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ private void addExtension(CldrValue v) {
110110

111111
private void addLstmdata(CldrValue v) {
112112
//System.out.println("addLstmdata: " + v.toString()); // debug
113-
addDependency(
113+
addLstmDependency(
114114
getDependencyName(v),
115115
LSTMDATA_TYPE.valueFrom(v),
116116
LSTMDATA_DEP.optionalValueFrom(v));
@@ -123,6 +123,15 @@ private void addDependency(String name, String type, Optional<String> dependency
123123
dependency.orElseThrow(() -> new IllegalArgumentException("missing dependency")));
124124
}
125125

126+
private void addLstmDependency(String name, String type, Optional<String> dependency) {
127+
// this should be :process(dependency) but that's not what ICU expects
128+
// BUILDRULES.py hard codes the inclusion of the .res files, see the TODO ticket.
129+
System.out.println("addLstmDependency: name " + name + ", type " + type + ", dependency " + dependency + " - see ICU-23215");
130+
icuData.add(
131+
RbPath.of(name, type /* + ":process(dependency)") */), // TODO ICU-23215
132+
dependency.orElseThrow(() -> new IllegalArgumentException("missing lstm dependency")));
133+
}
134+
126135
// Must match the BOUNDARIES or DICTIONARY path.
127136
private static String getDependencyName(CldrValue value) {
128137
return stripXmlNamespace(value.getPath().getParent().getName());

0 commit comments

Comments
 (0)