Skip to content

Commit 975b8d5

Browse files
committed
Use Unicode 17.0 name for split property
Unicode 16.0 created a subcategory of hyphens containing just U+2010 "HYPHEN". They did not name it, so I called it U2010. Unicode 17.0 does name it as HH (and adds more code points to it). So this commit changes the name to HH, in preparation for 17.0
1 parent 6d40608 commit 975b8d5

File tree

8 files changed

+283
-286
lines changed

8 files changed

+283
-286
lines changed

charclass_invlists.inc

Lines changed: 265 additions & 266 deletions
Large diffs are not rendered by default.

lib/unicore/mktables

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15642,8 +15642,10 @@ END
1564215642
};
1564315643
}
1564415644

15645-
if ($v_version ge 16.0.0) {
15646-
push @lb_splits, { name => 'U2010',
15645+
# Version 17.0 formalized this split into HH, and added other code points
15646+
# to it
15647+
if ($v_version eq 16.0.0) {
15648+
push @lb_splits, { name => 'HH',
1564715649
ranges => Range_List->new(Initialize
1564815650
=> Range->new(0x2010, 0x2010))
1564915651
};

lib/unicore/uni_keywords.pl

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

regcharclass.h

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

regen/mk_invlists.pl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,7 +2499,7 @@ ()
24992499
match_return => 'LB_NOBREAK',
25002500
rule => '19a',
25012501
},
2502-
LB_various_then_HY_or_U2010_v_AL => {
2502+
LB_various_then_HY_or_HH_v_AL => {
25032503
enum => $lb_enum++,
25042504
match_return => 'LB_NOBREAK',
25052505
rule => '20a',
@@ -2799,10 +2799,10 @@ ()
27992799

28002800
# LB20a Do not break after a word-initial hyphen.
28012801
# ( sot | BK | CR | LF | NL | SP | ZW | CB | GL )
2802-
# ( HY | [\x{2010} ] )
2802+
# ( HY | HH )
28032803
# × AL
2804-
$dfa = 'LB_various_then_HY_or_U2010_v_AL';
2805-
add_lb_dfa($_, 'AL', $dfa, '20a') for qw(HY U2010);
2804+
$dfa = 'LB_various_then_HY_or_HH_v_AL';
2805+
add_lb_dfa($_, 'AL', $dfa, '20a') for qw(HY HH);
28062806

28072807
# LB21 Do not break before hyphen-minus, other hyphens, fixed-width
28082808
# spaces, small kana, and other non-starters, or after acute accents.
@@ -3309,7 +3309,7 @@ sub token_name {
33093309
33103310
_Perl_GCB,EDGE,E_Base,E_Base_GAZ,E_Modifier,ExtPict_XX,Glue_After_Zwj,InCB_Consonant,InCB_Consonant_XX,InCB_Extend,InCB_Extend_EX,InCB_Linker,InCB_Linker_EX,LV,Prepend,Regional_Indicator,SpacingMark,ZWJ
33113311
3312-
_Perl_LB,EDGE,Aksara,Aksara_Prebase,Aksara_Start,AK,AP,Close_Parenthesis,Cn_ExtPict_ExtPict_ID,Contingent_Break,Dotted_Circle,Dotted_Circle_AL,East_Asian_CP,East_Asian_OP,E_Base,E_Modifier,H2,H3,Hebrew_Letter,JL,JT,JV,Next_Line,Pf_QU,Pi_QU,Regional_Indicator,U2010,VF,VI,Virama,Virama_Final,Word_Joiner,ZWJ
3312+
_Perl_LB,EDGE,Aksara,Aksara_Prebase,Aksara_Start,AK,AP,Close_Parenthesis,Cn_ExtPict_ExtPict_ID,Contingent_Break,Dotted_Circle,Dotted_Circle_AL,East_Asian_CP,East_Asian_OP,E_Base,E_Modifier,H2,H3,Hebrew_Letter,JL,JT,JV,Next_Line,Pf_QU,Pi_QU,Regional_Indicator,HH,VF,VI,Virama,Virama_Final,Word_Joiner,ZWJ
33133313
33143314
_Perl_SB,EDGE,CR,Extend,LF,SContinue
33153315

regexec.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5605,10 +5605,10 @@ S_isLB(pTHX_ LB_enum before,
56055605
|| isLB_CP(prev);
56065606
break;
56075607

5608-
case LB_various_then_HY_or_U2010_v_AL:
5608+
case LB_various_then_HY_or_HH_v_AL:
56095609
/* LB20a Do not break after a word-initial hyphen.
56105610
* ( sot | BK | CR | LF | NL | SP | ZW | CB | GL )
5611-
* ( HY | [\x{2010} ] )
5611+
* ( HY | HH ] )
56125612
* × AL */
56135613
prev = backup_one_LB_but_over_CM_ZWJ(strbeg, &prev_pos,
56145614
utf8_target);

regexp_constants.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
* e97e4259d0d20fab150b9c7b4b28abfae5cd78ca97e7f4ac6ed20d685d5f4a7c lib/unicore/LineBreak.txt
5151
* 9953f0fcebf5ea8091c5c581e4df0e43f20d2533c84ccca7987a9bb819a896a8 lib/unicore/NameAliases.txt
5252
* 4ff660cb922480cd5aab9a689b1a6905d0a54575baf9967d0f1e00ac866f04dd lib/unicore/NamedSequences.txt
53-
* d811971453e7075e1ad56fb1b301eece5aa80757b81f6156e74a1bfb3ae5ceb1 lib/unicore/NormTest.txt
5453
* 53d614508e2a0b2305a8aa21cd60d993de9326cdf65993660dfcce4503548583 lib/unicore/PropList.txt
5554
* 440fd3e5460b9bfe31da67b6f923992e1989d31fe2ed91e091c4b8f8e2620bf9 lib/unicore/PropValueAliases.txt
5655
* 33a9f2266ad6b8e8de05c0ea3dfac411ac62cf8839ff1c94057471e4c5f6a2b3 lib/unicore/PropertyAliases.txt
@@ -79,9 +78,9 @@
7978
* 4be1c18e7b121d951018065b453bb05083f624c8f905ce8be7c0f354c9097c95 lib/unicore/extracted/DLineBreak.txt
8079
* 786833e0a3f5ec0c0cd0940e4c15f730f3a92163f354ecd7dede28a70c0fa892 lib/unicore/extracted/DNumType.txt
8180
* 00b43cc5c9b86a834f82389c4537f103e652821387daa556f0bd220f6c23007e lib/unicore/extracted/DNumValues.txt
82-
* 1c7436cfa4a07f19f29fe0e499b49bcf6662be766b18f1a97cce9693703c4ca5 lib/unicore/mktables
81+
* 5ce9c6296c80a4ccd654a8e8fed6a028b5ad95050afb821a819f98dd77a11c22 lib/unicore/mktables
8382
* 63f771c327e92574fbd77919586079c38f669058a5e6b67ccec385ef8fcde882 lib/unicore/version
8483
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
8584
* c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl
86-
* 6f140fe16685fe5d0e81e2984af81342aff5eaba309991002eaca94d032b2ecc regen/mk_invlists.pl
85+
* ec2bea650f1338fcd1f2f5b0e589f89e653c180b3994468788fd343a2869ced7 regen/mk_invlists.pl
8786
* ex: set ro ft=c: */

uni_keywords.h

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)