Skip to content

Commit 95af74d

Browse files
committed
syntax: update to Unicode 14
Closes #878
1 parent 8fe3716 commit 95af74d

15 files changed

+2012
-1153
lines changed

regex-syntax/src/unicode.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,7 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>> {
604604
("V12_0", age::V12_0),
605605
("V12_1", age::V12_1),
606606
("V13_0", age::V13_0),
607+
("V14_0", age::V14_0),
607608
];
608609
assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
609610

regex-syntax/src/unicode_tables/age.rs

Lines changed: 140 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
22
//
3-
// ucd-generate age ucd-13.0.0 --chars
3+
// ucd-generate age /tmp/ucd --chars
44
//
5-
// Unicode version: 13.0.0.
5+
// Unicode version: 14.0.0.
66
//
7-
// ucd-generate 0.2.8 is available on crates.io.
7+
// ucd-generate 0.2.11 is available on crates.io.
88

99
pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
1010
("V10_0", V10_0),
1111
("V11_0", V11_0),
1212
("V12_0", V12_0),
1313
("V12_1", V12_1),
1414
("V13_0", V13_0),
15+
("V14_0", V14_0),
1516
("V1_1", V1_1),
1617
("V2_0", V2_0),
1718
("V2_1", V2_1),
@@ -203,69 +204,150 @@ pub const V12_0: &'static [(char, char)] = &[
203204
pub const V12_1: &'static [(char, char)] = &[('㋿', '㋿')];
204205

205206
pub const V13_0: &'static [(char, char)] = &[
206-
('\u{8be}', '\u{8c7}'),
207+
('', ''),
207208
('\u{b55}', '\u{b55}'),
208-
('\u{d04}', '\u{d04}'),
209+
('', ''),
209210
('\u{d81}', '\u{d81}'),
210211
('\u{1abf}', '\u{1ac0}'),
211-
('\u{2b97}', '\u{2b97}'),
212-
('\u{2e50}', '\u{2e52}'),
213-
('\u{31bb}', '\u{31bf}'),
214-
('\u{4db6}', '\u{4dbf}'),
215-
('\u{9ff0}', '\u{9ffc}'),
216-
('\u{a7c7}', '\u{a7ca}'),
217-
('\u{a7f5}', '\u{a7f6}'),
212+
('', ''),
213+
('', ''),
214+
('', ''),
215+
('', '䶿'),
216+
('', ''),
217+
('', ''),
218+
('', ''),
218219
('\u{a82c}', '\u{a82c}'),
219-
('\u{ab68}', '\u{ab6b}'),
220-
('\u{1019c}', '\u{1019c}'),
221-
('\u{10e80}', '\u{10ea9}'),
222-
('\u{10eab}', '\u{10ead}'),
223-
('\u{10eb0}', '\u{10eb1}'),
224-
('\u{10fb0}', '\u{10fcb}'),
225-
('\u{11147}', '\u{11147}'),
226-
('\u{111ce}', '\u{111cf}'),
227-
('\u{1145a}', '\u{1145a}'),
228-
('\u{11460}', '\u{11461}'),
229-
('\u{11900}', '\u{11906}'),
230-
('\u{11909}', '\u{11909}'),
231-
('\u{1190c}', '\u{11913}'),
232-
('\u{11915}', '\u{11916}'),
233-
('\u{11918}', '\u{11935}'),
234-
('\u{11937}', '\u{11938}'),
235-
('\u{1193b}', '\u{11946}'),
236-
('\u{11950}', '\u{11959}'),
237-
('\u{11fb0}', '\u{11fb0}'),
220+
('', ''),
221+
('𐆜', '𐆜'),
222+
('𐺀', '𐺩'),
223+
('\u{10eab}', '𐺭'),
224+
('𐺰', '𐺱'),
225+
('𐾰', '𐿋'),
226+
('𑅇', '𑅇'),
227+
('𑇎', '\u{111cf}'),
228+
('𑑚', '𑑚'),
229+
('𑑠', '𑑡'),
230+
('𑤀', '𑤆'),
231+
('𑤉', '𑤉'),
232+
('𑤌', '𑤓'),
233+
('𑤕', '𑤖'),
234+
('𑤘', '𑤵'),
235+
('𑤷', '𑤸'),
236+
('\u{1193b}', '𑥆'),
237+
('𑥐', '𑥙'),
238+
('𑾰', '𑾰'),
238239
('\u{16fe4}', '\u{16fe4}'),
239-
('\u{16ff0}', '\u{16ff1}'),
240-
('\u{18af3}', '\u{18cd5}'),
241-
('\u{18d00}', '\u{18d08}'),
242-
('\u{1f10d}', '\u{1f10f}'),
243-
('\u{1f16d}', '\u{1f16f}'),
244-
('\u{1f1ad}', '\u{1f1ad}'),
245-
('\u{1f6d6}', '\u{1f6d7}'),
246-
('\u{1f6fb}', '\u{1f6fc}'),
247-
('\u{1f8b0}', '\u{1f8b1}'),
248-
('\u{1f90c}', '\u{1f90c}'),
249-
('\u{1f972}', '\u{1f972}'),
250-
('\u{1f977}', '\u{1f978}'),
251-
('\u{1f9a3}', '\u{1f9a4}'),
252-
('\u{1f9ab}', '\u{1f9ad}'),
253-
('\u{1f9cb}', '\u{1f9cb}'),
254-
('\u{1fa74}', '\u{1fa74}'),
255-
('\u{1fa83}', '\u{1fa86}'),
256-
('\u{1fa96}', '\u{1faa8}'),
257-
('\u{1fab0}', '\u{1fab6}'),
258-
('\u{1fac0}', '\u{1fac2}'),
259-
('\u{1fad0}', '\u{1fad6}'),
260-
('\u{1fb00}', '\u{1fb92}'),
261-
('\u{1fb94}', '\u{1fbca}'),
262-
('\u{1fbf0}', '\u{1fbf9}'),
263-
('\u{2a6d7}', '\u{2a6dd}'),
264-
('\u{30000}', '\u{3134a}'),
240+
('𖿰', '𖿱'),
241+
('𘫳', '𘳕'),
242+
('𘴀', '𘴈'),
243+
('🄍', '🄏'),
244+
('🅭', '🅯'),
245+
('🆭', '🆭'),
246+
('🛖', '🛗'),
247+
('🛻', '🛼'),
248+
('🢰', '🢱'),
249+
('🤌', '🤌'),
250+
('🥲', '🥲'),
251+
('🥷', '🥸'),
252+
('🦣', '🦤'),
253+
('🦫', '🦭'),
254+
('🧋', '🧋'),
255+
('🩴', '🩴'),
256+
('🪃', '🪆'),
257+
('🪖', '🪨'),
258+
('🪰', '🪶'),
259+
('🫀', '🫂'),
260+
('🫐', '🫖'),
261+
('🬀', '🮒'),
262+
('🮔', '🯊'),
263+
('🯰', '🯹'),
264+
('𪛗', '𪛝'),
265+
('𰀀', '𱍊'),
266+
];
267+
268+
pub const V14_0: &'static [(char, char)] = &[
269+
('؝', '؝'),
270+
('ࡰ', 'ࢎ'),
271+
('\u{890}', '\u{891}'),
272+
('\u{898}', '\u{89f}'),
273+
('ࢵ', 'ࢵ'),
274+
('ࣈ', '\u{8d2}'),
275+
('\u{c3c}', '\u{c3c}'),
276+
('ౝ', 'ౝ'),
277+
('ೝ', 'ೝ'),
278+
('ᜍ', 'ᜍ'),
279+
('᜕', '᜕'),
280+
('ᜟ', 'ᜟ'),
281+
('\u{180f}', '\u{180f}'),
282+
('\u{1ac1}', '\u{1ace}'),
283+
('ᭌ', 'ᭌ'),
284+
('᭽', '᭾'),
285+
('\u{1dfa}', '\u{1dfa}'),
286+
('⃀', '⃀'),
287+
('Ⱟ', 'Ⱟ'),
288+
('ⱟ', 'ⱟ'),
289+
('⹓', '⹝'),
290+
('鿽', '鿿'),
291+
('Ꟁ', 'ꟁ'),
292+
('Ꟑ', 'ꟑ'),
293+
('ꟓ', 'ꟓ'),
294+
('ꟕ', 'ꟙ'),
295+
('ꟲ', 'ꟴ'),
296+
('﯂', '﯂'),
297+
('﵀', '﵏'),
298+
('﷏', '﷏'),
299+
('﷾', '﷿'),
300+
('𐕰', '𐕺'),
301+
('𐕼', '𐖊'),
302+
('𐖌', '𐖒'),
303+
('𐖔', '𐖕'),
304+
('𐖗', '𐖡'),
305+
('𐖣', '𐖱'),
306+
('𐖳', '𐖹'),
307+
('𐖻', '𐖼'),
308+
('𐞀', '𐞅'),
309+
('𐞇', '𐞰'),
310+
('𐞲', '𐞺'),
311+
('𐽰', '𐾉'),
312+
('\u{11070}', '𑁵'),
313+
('\u{110c2}', '\u{110c2}'),
314+
('𑚹', '𑚹'),
315+
('𑝀', '𑝆'),
316+
('𑪰', '𑪿'),
317+
('𒾐', '𒿲'),
318+
('𖩰', '𖪾'),
319+
('𖫀', '𖫉'),
320+
('𚿰', '𚿳'),
321+
('𚿵', '𚿻'),
322+
('𚿽', '𚿾'),
323+
('𛄟', '𛄢'),
324+
('\u{1cf00}', '\u{1cf2d}'),
325+
('\u{1cf30}', '\u{1cf46}'),
326+
('𜽐', '𜿃'),
327+
('𝇩', '𝇪'),
328+
('𝼀', '𝼞'),
329+
('𞊐', '\u{1e2ae}'),
330+
('𞟠', '𞟦'),
331+
('𞟨', '𞟫'),
332+
('𞟭', '𞟮'),
333+
('𞟰', '𞟾'),
334+
('🛝', '🛟'),
335+
('🟰', '🟰'),
336+
('🥹', '🥹'),
337+
('🧌', '🧌'),
338+
('🩻', '🩼'),
339+
('🪩', '🪬'),
340+
('🪷', '🪺'),
341+
('🫃', '🫅'),
342+
('🫗', '🫙'),
343+
('🫠', '🫧'),
344+
('🫰', '🫶'),
345+
('𪛞', '𪛟'),
346+
('𫜵', '𫜸'),
265347
];
266348

267349
pub const V1_1: &'static [(char, char)] = &[
268-
('\u{0}', 'ǵ'),
350+
('\0', 'ǵ'),
269351
('Ǻ', 'ȗ'),
270352
('ɐ', 'ʨ'),
271353
('ʰ', '˞'),

regex-syntax/src/unicode_tables/case_folding_simple.rs

Lines changed: 89 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
22
//
3-
// ucd-generate case-folding-simple ucd-13.0.0 --chars --all-pairs
3+
// ucd-generate case-folding-simple /tmp/ucd --chars --all-pairs
44
//
5-
// Unicode version: 13.0.0.
5+
// Unicode version: 14.0.0.
66
//
7-
// ucd-generate 0.2.8 is available on crates.io.
7+
// ucd-generate 0.2.11 is available on crates.io.
88

99
pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
1010
('A', &['a']),
@@ -1781,6 +1781,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
17811781
('Ⱜ', &['ⱜ']),
17821782
('Ⱝ', &['ⱝ']),
17831783
('Ⱞ', &['ⱞ']),
1784+
('Ⱟ', &['ⱟ']),
17841785
('ⰰ', &['Ⰰ']),
17851786
('ⰱ', &['Ⰱ']),
17861787
('ⰲ', &['Ⰲ']),
@@ -1828,6 +1829,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
18281829
('ⱜ', &['Ⱜ']),
18291830
('ⱝ', &['Ⱝ']),
18301831
('ⱞ', &['Ⱞ']),
1832+
('ⱟ', &['Ⱟ']),
18311833
('Ⱡ', &['ⱡ']),
18321834
('ⱡ', &['Ⱡ']),
18331835
('Ɫ', &['ɫ']),
@@ -2211,17 +2213,25 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
22112213
('ꞽ', &['Ꞽ']),
22122214
('Ꞿ', &['ꞿ']),
22132215
('ꞿ', &['Ꞿ']),
2216+
('Ꟁ', &['ꟁ']),
2217+
('ꟁ', &['Ꟁ']),
22142218
('Ꟃ', &['ꟃ']),
22152219
('ꟃ', &['Ꟃ']),
22162220
('Ꞔ', &['ꞔ']),
22172221
('Ʂ', &['ʂ']),
22182222
('Ᶎ', &['ᶎ']),
2219-
('\u{a7c7}', &['\u{a7c8}']),
2220-
('\u{a7c8}', &['\u{a7c7}']),
2221-
('\u{a7c9}', &['\u{a7ca}']),
2222-
('\u{a7ca}', &['\u{a7c9}']),
2223-
('\u{a7f5}', &['\u{a7f6}']),
2224-
('\u{a7f6}', &['\u{a7f5}']),
2223+
('Ꟈ', &['ꟈ']),
2224+
('ꟈ', &['Ꟈ']),
2225+
('Ꟊ', &['ꟊ']),
2226+
('ꟊ', &['Ꟊ']),
2227+
('Ꟑ', &['ꟑ']),
2228+
('ꟑ', &['Ꟑ']),
2229+
('Ꟗ', &['ꟗ']),
2230+
('ꟗ', &['Ꟗ']),
2231+
('Ꟙ', &['ꟙ']),
2232+
('ꟙ', &['Ꟙ']),
2233+
('Ꟶ', &['ꟶ']),
2234+
('ꟶ', &['Ꟶ']),
22252235
('ꭓ', &['Ꭓ']),
22262236
('ꭰ', &['Ꭰ']),
22272237
('ꭱ', &['Ꭱ']),
@@ -2507,6 +2517,76 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
25072517
('𐓹', &['𐓑']),
25082518
('𐓺', &['𐓒']),
25092519
('𐓻', &['𐓓']),
2520+
('𐕰', &['𐖗']),
2521+
('𐕱', &['𐖘']),
2522+
('𐕲', &['𐖙']),
2523+
('𐕳', &['𐖚']),
2524+
('𐕴', &['𐖛']),
2525+
('𐕵', &['𐖜']),
2526+
('𐕶', &['𐖝']),
2527+
('𐕷', &['𐖞']),
2528+
('𐕸', &['𐖟']),
2529+
('𐕹', &['𐖠']),
2530+
('𐕺', &['𐖡']),
2531+
('𐕼', &['𐖣']),
2532+
('𐕽', &['𐖤']),
2533+
('𐕾', &['𐖥']),
2534+
('𐕿', &['𐖦']),
2535+
('𐖀', &['𐖧']),
2536+
('𐖁', &['𐖨']),
2537+
('𐖂', &['𐖩']),
2538+
('𐖃', &['𐖪']),
2539+
('𐖄', &['𐖫']),
2540+
('𐖅', &['𐖬']),
2541+
('𐖆', &['𐖭']),
2542+
('𐖇', &['𐖮']),
2543+
('𐖈', &['𐖯']),
2544+
('𐖉', &['𐖰']),
2545+
('𐖊', &['𐖱']),
2546+
('𐖌', &['𐖳']),
2547+
('𐖍', &['𐖴']),
2548+
('𐖎', &['𐖵']),
2549+
('𐖏', &['𐖶']),
2550+
('𐖐', &['𐖷']),
2551+
('𐖑', &['𐖸']),
2552+
('𐖒', &['𐖹']),
2553+
('𐖔', &['𐖻']),
2554+
('𐖕', &['𐖼']),
2555+
('𐖗', &['𐕰']),
2556+
('𐖘', &['𐕱']),
2557+
('𐖙', &['𐕲']),
2558+
('𐖚', &['𐕳']),
2559+
('𐖛', &['𐕴']),
2560+
('𐖜', &['𐕵']),
2561+
('𐖝', &['𐕶']),
2562+
('𐖞', &['𐕷']),
2563+
('𐖟', &['𐕸']),
2564+
('𐖠', &['𐕹']),
2565+
('𐖡', &['𐕺']),
2566+
('𐖣', &['𐕼']),
2567+
('𐖤', &['𐕽']),
2568+
('𐖥', &['𐕾']),
2569+
('𐖦', &['𐕿']),
2570+
('𐖧', &['𐖀']),
2571+
('𐖨', &['𐖁']),
2572+
('𐖩', &['𐖂']),
2573+
('𐖪', &['𐖃']),
2574+
('𐖫', &['𐖄']),
2575+
('𐖬', &['𐖅']),
2576+
('𐖭', &['𐖆']),
2577+
('𐖮', &['𐖇']),
2578+
('𐖯', &['𐖈']),
2579+
('𐖰', &['𐖉']),
2580+
('𐖱', &['𐖊']),
2581+
('𐖳', &['𐖌']),
2582+
('𐖴', &['𐖍']),
2583+
('𐖵', &['𐖎']),
2584+
('𐖶', &['𐖏']),
2585+
('𐖷', &['𐖐']),
2586+
('𐖸', &['𐖑']),
2587+
('𐖹', &['𐖒']),
2588+
('𐖻', &['𐖔']),
2589+
('𐖼', &['𐖕']),
25102590
('𐲀', &['𐳀']),
25112591
('𐲁', &['𐳁']),
25122592
('𐲂', &['𐳂']),

0 commit comments

Comments
 (0)