|
13 | 13 | % Weights for unified Han characters follow the Unified Repertoire and |
14 | 14 | % Ordering, which is a language-neutral, traditional radical-stroke order. |
15 | 15 |
|
16 | | -% The original URO and Extensions A through I, plus the 12 unified Han characters |
| 16 | +% The original URO and Extensions A through J, plus the 12 unified Han characters |
17 | 17 | % in the CJK compatibility area are weighted implicitly as defined here. |
18 | 18 |
|
19 | 19 | % WEIGHT_BASE = 0xFB40 for original URO and 12 unified Han from CJK compat area. |
20 | 20 | % cp >= 0x04E00 && cp <= 0x09FFF % URO |
21 | 21 | % WEIGHT_BASE = 0xFB80 for Extension A through Extension I Han characters. |
22 | 22 | % cp >= 0x03400 && cp <= 0x04DBF % Ext. A |
23 | 23 | % cp >= 0x20000 && cp <= 0x2A6DF % Ext. B |
24 | | -% cp >= 0x2A700 && cp <= 0x2B739 % Ext. C |
| 24 | +% cp >= 0x2A700 && cp <= 0x2B73F % Ext. C |
25 | 25 | % cp >= 0x2B740 && cp <= 0x2B81D % Ext. D |
26 | | -% cp >= 0x2B820 && cp <= 0x2CEA1 % Ext. E |
| 26 | +% cp >= 0x2B820 && cp <= 0x2CEAD % Ext. E |
27 | 27 | % cp >= 0x2CEB0 && cp <= 0x2EBE0 % Ext. F |
28 | 28 | % cp >= 0x2EBF0 && cp <= 0x2EE5D % Ext. I |
29 | 29 | % cp >= 0x30000 && cp <= 0x3134A % Ext. G |
30 | 30 | % cp >= 0x31350 && cp <= 0x323AF % Ext. H |
| 31 | +% cp >= 0x323B0 && cp <= 0x33479 % Ext. J |
31 | 32 | % For a given Han character at code point cp: |
32 | 33 | % base1 = WEIGHT_BASE + ( cp >> 15 ) |
33 | 34 | % base2 = ( cp & 0x7FFF ) | 0x8000 |
34 | 35 | % Then weight the character as: <U{cp}> "<R{base1}><T{base2}>";<BASE>;<MIN>;<SFFFF> |
35 | 36 |
|
36 | | -% Tangut ideographic and component characters are weighted implicitly as defined here. |
| 37 | +% Tangut ideographic characters are weighted implicitly as defined here. |
37 | 38 |
|
38 | 39 | % WEIGHT_BASE = 0xFB00 |
39 | | -% cp >= 0x17000 && cp <= 0x187F7 % Tangut ideographs |
| 40 | +% cp >= 0x17000 && cp <= 0x187FF % Tangut ideographs |
| 41 | +% cp >= 0x18D00 && cp <= 0x18D1E % Tangut ideograph supplement |
| 42 | +% For a given Tangut character at code point cp: |
| 43 | +% base1 = WEIGHT_BASE |
| 44 | +% base2 = ( cp - 0x17000 ) | 0x8000 |
| 45 | +% Then weight the character as: <U{cp}> "<R{base1}><T{base2}>";<BASE>;<MIN>;<SFFFF> |
| 46 | + |
| 47 | +% Tangut component characters are weighted implicitly as defined here. |
| 48 | + |
| 49 | +% WEIGHT_BASE = 0xFB01 |
40 | 50 | % cp >= 0x18800 && cp <= 0x18AFF % Tangut components |
41 | | -% cp >= 0x18D00 && cp <= 0x18D08 % Tangut ideograph supplement |
| 51 | +% cp >= 0x18D80 && cp <= 0x18DFF % Tangut component supplement |
42 | 52 | % For a given Tangut character at code point cp: |
43 | 53 | % base1 = WEIGHT_BASE |
44 | 54 | % base2 = ( cp - 0x17000 ) | 0x8000 |
45 | 55 | % Then weight the character as: <U{cp}> "<R{base1}><T{base2}>";<BASE>;<MIN>;<SFFFF> |
46 | 56 |
|
47 | 57 | % Nushu ideographic characters are weighted implicitly as defined here. |
48 | 58 |
|
49 | | -% WEIGHT_BASE = 0xFB01 |
| 59 | +% WEIGHT_BASE = 0xFB02 |
50 | 60 | % cp >= 0x1B170 && cp <= 0x1B2FB % Nushu |
51 | 61 | % For a given Nushu character at code point cp: |
52 | 62 | % base1 = WEIGHT_BASE |
|
55 | 65 |
|
56 | 66 | % Khitan Small Script ideographic characters are weighted implicitly as defined here. |
57 | 67 |
|
58 | | -% WEIGHT_BASE = 0xFB02 |
| 68 | +% WEIGHT_BASE = 0xFB03 |
59 | 69 | % cp >= 0x18B00 && cp <= 0x18CD5 % Khitan Small Script |
60 | 70 | % For a given Khitan Small Script character at code point cp: |
61 | 71 | % base1 = WEIGHT_BASE |
|
0 commit comments