Skip to content

Commit 200f3d7

Browse files
authored
Support Line_Break property for unicodeset_parse. (#6396)
Allow to parse [:Line_Break=xxxx:] for unicodeset parser.
1 parent c6b8a32 commit 200f3d7

File tree

3 files changed

+34
-1
lines changed

3 files changed

+34
-1
lines changed

components/experimental/src/transliterate/compile/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,12 +230,14 @@ impl RuleCollection {
230230
+ DataProvider<PropertyEnumCanonicalCombiningClassV1>
231231
+ DataProvider<PropertyEnumGeneralCategoryV1>
232232
+ DataProvider<PropertyEnumGraphemeClusterBreakV1>
233+
+ DataProvider<PropertyEnumLineBreakV1>
233234
+ DataProvider<PropertyEnumScriptV1>
234235
+ DataProvider<PropertyEnumSentenceBreakV1>
235236
+ DataProvider<PropertyEnumWordBreakV1>
236237
+ DataProvider<PropertyNameParseCanonicalCombiningClassV1>
237238
+ DataProvider<PropertyNameParseGeneralCategoryMaskV1>
238239
+ DataProvider<PropertyNameParseGraphemeClusterBreakV1>
240+
+ DataProvider<PropertyNameParseLineBreakV1>
239241
+ DataProvider<PropertyNameParseScriptV1>
240242
+ DataProvider<PropertyNameParseSentenceBreakV1>
241243
+ DataProvider<PropertyNameParseWordBreakV1>
@@ -328,12 +330,14 @@ where
328330
+ DataProvider<PropertyEnumCanonicalCombiningClassV1>
329331
+ DataProvider<PropertyEnumGeneralCategoryV1>
330332
+ DataProvider<PropertyEnumGraphemeClusterBreakV1>
333+
+ DataProvider<PropertyEnumLineBreakV1>
331334
+ DataProvider<PropertyEnumScriptV1>
332335
+ DataProvider<PropertyEnumSentenceBreakV1>
333336
+ DataProvider<PropertyEnumWordBreakV1>
334337
+ DataProvider<PropertyNameParseCanonicalCombiningClassV1>
335338
+ DataProvider<PropertyNameParseGeneralCategoryMaskV1>
336339
+ DataProvider<PropertyNameParseGraphemeClusterBreakV1>
340+
+ DataProvider<PropertyNameParseLineBreakV1>
337341
+ DataProvider<PropertyNameParseScriptV1>
338342
+ DataProvider<PropertyNameParseSentenceBreakV1>
339343
+ DataProvider<PropertyNameParseWordBreakV1>
@@ -498,12 +502,14 @@ where
498502
+ DataProvider<PropertyEnumCanonicalCombiningClassV1>
499503
+ DataProvider<PropertyEnumGeneralCategoryV1>
500504
+ DataProvider<PropertyEnumGraphemeClusterBreakV1>
505+
+ DataProvider<PropertyEnumLineBreakV1>
501506
+ DataProvider<PropertyEnumScriptV1>
502507
+ DataProvider<PropertyEnumSentenceBreakV1>
503508
+ DataProvider<PropertyEnumWordBreakV1>
504509
+ DataProvider<PropertyNameParseCanonicalCombiningClassV1>
505510
+ DataProvider<PropertyNameParseGeneralCategoryMaskV1>
506511
+ DataProvider<PropertyNameParseGraphemeClusterBreakV1>
512+
+ DataProvider<PropertyNameParseLineBreakV1>
507513
+ DataProvider<PropertyNameParseScriptV1>
508514
+ DataProvider<PropertyNameParseSentenceBreakV1>
509515
+ DataProvider<PropertyNameParseWordBreakV1>

components/experimental/src/transliterate/compile/parse.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,12 +315,14 @@ where
315315
+ DataProvider<PropertyEnumCanonicalCombiningClassV1>
316316
+ DataProvider<PropertyEnumGeneralCategoryV1>
317317
+ DataProvider<PropertyEnumGraphemeClusterBreakV1>
318+
+ DataProvider<PropertyEnumLineBreakV1>
318319
+ DataProvider<PropertyEnumScriptV1>
319320
+ DataProvider<PropertyEnumSentenceBreakV1>
320321
+ DataProvider<PropertyEnumWordBreakV1>
321322
+ DataProvider<PropertyNameParseCanonicalCombiningClassV1>
322323
+ DataProvider<PropertyNameParseGeneralCategoryMaskV1>
323324
+ DataProvider<PropertyNameParseGraphemeClusterBreakV1>
325+
+ DataProvider<PropertyNameParseLineBreakV1>
324326
+ DataProvider<PropertyNameParseScriptV1>
325327
+ DataProvider<PropertyNameParseSentenceBreakV1>
326328
+ DataProvider<PropertyNameParseWordBreakV1>

components/experimental/src/unicodeset_parse/parse.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use icu_properties::script::ScriptWithExtensions;
1818
use icu_properties::{
1919
props::{
2020
CanonicalCombiningClass, EnumeratedProperty, GeneralCategory, GeneralCategoryGroup,
21-
GraphemeClusterBreak, Script, SentenceBreak, WordBreak,
21+
GraphemeClusterBreak, LineBreak, Script, SentenceBreak, WordBreak,
2222
},
2323
CodePointMapData,
2424
};
@@ -487,12 +487,14 @@ where
487487
+ DataProvider<PropertyEnumCanonicalCombiningClassV1>
488488
+ DataProvider<PropertyEnumGeneralCategoryV1>
489489
+ DataProvider<PropertyEnumGraphemeClusterBreakV1>
490+
+ DataProvider<PropertyEnumLineBreakV1>
490491
+ DataProvider<PropertyEnumScriptV1>
491492
+ DataProvider<PropertyEnumSentenceBreakV1>
492493
+ DataProvider<PropertyEnumWordBreakV1>
493494
+ DataProvider<PropertyNameParseCanonicalCombiningClassV1>
494495
+ DataProvider<PropertyNameParseGeneralCategoryMaskV1>
495496
+ DataProvider<PropertyNameParseGraphemeClusterBreakV1>
497+
+ DataProvider<PropertyNameParseLineBreakV1>
496498
+ DataProvider<PropertyNameParseScriptV1>
497499
+ DataProvider<PropertyNameParseSentenceBreakV1>
498500
+ DataProvider<PropertyNameParseWordBreakV1>
@@ -1128,6 +1130,8 @@ where
11281130
let mut try_scx = Err(PEK::UnknownProperty.into());
11291131
// contains a value for the Grapheme_Cluster_Break property that needs to be tried
11301132
let mut try_gcb = Err(PEK::UnknownProperty.into());
1133+
// contains a value for the Line_Break property that needs to be tried
1134+
let mut try_lb = Err(PEK::UnknownProperty.into());
11311135
// contains a value for the Sentence_Break property that needs to be tried
11321136
let mut try_sb = Err(PEK::UnknownProperty.into());
11331137
// contains a value for the Word_Break property that needs to be tried
@@ -1150,6 +1154,7 @@ where
11501154
GraphemeClusterBreak::NAME | GraphemeClusterBreak::SHORT_NAME => {
11511155
try_gcb = Ok(value)
11521156
}
1157+
LineBreak::NAME | LineBreak::SHORT_NAME => try_lb = Ok(value),
11531158
Script::NAME | Script::SHORT_NAME => try_sc = Ok(value),
11541159
SentenceBreak::NAME | SentenceBreak::SHORT_NAME => try_sb = Ok(value),
11551160
WordBreak::NAME | WordBreak::SHORT_NAME => try_wb = Ok(value),
@@ -1187,6 +1192,7 @@ where
11871192
.or_else(|_| try_scx.and_then(|value| self.try_load_script_extensions_set(value)))
11881193
.or_else(|_| try_binary.and_then(|value| self.try_load_ecma262_binary_set(value)))
11891194
.or_else(|_| try_gcb.and_then(|value| self.try_load_grapheme_cluster_break_set(value)))
1195+
.or_else(|_| try_lb.and_then(|value| self.try_load_line_break_set(value)))
11901196
.or_else(|_| try_sb.and_then(|value| self.try_load_sentence_break_set(value)))
11911197
.or_else(|_| try_wb.and_then(|value| self.try_load_word_break_set(value)))
11921198
.or_else(|_| try_ccc.and_then(|value| self.try_load_ccc_set(value)))
@@ -1419,6 +1425,21 @@ where
14191425
Ok(())
14201426
}
14211427

1428+
fn try_load_line_break_set(&mut self, name: &str) -> Result<()> {
1429+
let parser = PropertyParser::<LineBreak>::try_new_unstable(self.property_provider)
1430+
.map_err(|_| PEK::Internal)?;
1431+
let lb_value = parser
1432+
.as_borrowed()
1433+
.get_loose(name)
1434+
.ok_or(PEK::UnknownProperty)?;
1435+
// TODO(#3550): This could be cached; does not depend on name.
1436+
let property_map = CodePointMapData::<LineBreak>::try_new_unstable(self.property_provider)
1437+
.map_err(|_| PEK::Internal)?;
1438+
let set = property_map.as_borrowed().get_set_for_value(lb_value);
1439+
self.single_set.add_set(&set.to_code_point_inversion_list());
1440+
Ok(())
1441+
}
1442+
14221443
fn try_load_sentence_break_set(&mut self, name: &str) -> Result<()> {
14231444
let parser = PropertyParser::<SentenceBreak>::try_new_unstable(self.property_provider)
14241445
.map_err(|_| PEK::Internal)?;
@@ -1670,12 +1691,14 @@ where
16701691
+ DataProvider<PropertyEnumCanonicalCombiningClassV1>
16711692
+ DataProvider<PropertyEnumGeneralCategoryV1>
16721693
+ DataProvider<PropertyEnumGraphemeClusterBreakV1>
1694+
+ DataProvider<PropertyEnumLineBreakV1>
16731695
+ DataProvider<PropertyEnumScriptV1>
16741696
+ DataProvider<PropertyEnumSentenceBreakV1>
16751697
+ DataProvider<PropertyEnumWordBreakV1>
16761698
+ DataProvider<PropertyNameParseCanonicalCombiningClassV1>
16771699
+ DataProvider<PropertyNameParseGeneralCategoryMaskV1>
16781700
+ DataProvider<PropertyNameParseGraphemeClusterBreakV1>
1701+
+ DataProvider<PropertyNameParseLineBreakV1>
16791702
+ DataProvider<PropertyNameParseScriptV1>
16801703
+ DataProvider<PropertyNameParseSentenceBreakV1>
16811704
+ DataProvider<PropertyNameParseWordBreakV1>
@@ -1786,12 +1809,14 @@ where
17861809
+ DataProvider<PropertyEnumCanonicalCombiningClassV1>
17871810
+ DataProvider<PropertyEnumGeneralCategoryV1>
17881811
+ DataProvider<PropertyEnumGraphemeClusterBreakV1>
1812+
+ DataProvider<PropertyEnumLineBreakV1>
17891813
+ DataProvider<PropertyEnumScriptV1>
17901814
+ DataProvider<PropertyEnumSentenceBreakV1>
17911815
+ DataProvider<PropertyEnumWordBreakV1>
17921816
+ DataProvider<PropertyNameParseCanonicalCombiningClassV1>
17931817
+ DataProvider<PropertyNameParseGeneralCategoryMaskV1>
17941818
+ DataProvider<PropertyNameParseGraphemeClusterBreakV1>
1819+
+ DataProvider<PropertyNameParseLineBreakV1>
17951820
+ DataProvider<PropertyNameParseScriptV1>
17961821
+ DataProvider<PropertyNameParseSentenceBreakV1>
17971822
+ DataProvider<PropertyNameParseWordBreakV1>

0 commit comments

Comments
 (0)