Skip to content

Commit 334d40e

Browse files
committed
Merge branch 'tb/unicode-6.3-zero-width'
Update the logic to compute the display width needed for utf8 strings and allow us to more easily maintain the tables used in that logic. We may want to let the users choose if codepoints with ambiguous widths are treated as a double or single width in a follow-up patch. * tb/unicode-6.3-zero-width: utf8: make it easier to auto-update git_wcwidth() utf8.c: use a table for double_width
2 parents a046013 + 9c94389 commit 334d40e

File tree

5 files changed

+334
-69
lines changed

5 files changed

+334
-69
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@
226226
/config.mak.autogen
227227
/config.mak.append
228228
/configure
229+
/unicode
229230
/tags
230231
/TAGS
231232
/cscope*

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,7 @@ LIB_H += transport.h
729729
LIB_H += tree-walk.h
730730
LIB_H += tree.h
731731
LIB_H += unpack-trees.h
732+
LIB_H += unicode_width.h
732733
LIB_H += url.h
733734
LIB_H += urlmatch.h
734735
LIB_H += userdiff.h

unicode_width.h

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
static const struct interval zero_width[] = {
2+
{ 0x0300, 0x036F },
3+
{ 0x0483, 0x0489 },
4+
{ 0x0591, 0x05BD },
5+
{ 0x05BF, 0x05BF },
6+
{ 0x05C1, 0x05C2 },
7+
{ 0x05C4, 0x05C5 },
8+
{ 0x05C7, 0x05C7 },
9+
{ 0x0600, 0x0604 },
10+
{ 0x0610, 0x061A },
11+
{ 0x061C, 0x061C },
12+
{ 0x064B, 0x065F },
13+
{ 0x0670, 0x0670 },
14+
{ 0x06D6, 0x06DD },
15+
{ 0x06DF, 0x06E4 },
16+
{ 0x06E7, 0x06E8 },
17+
{ 0x06EA, 0x06ED },
18+
{ 0x070F, 0x070F },
19+
{ 0x0711, 0x0711 },
20+
{ 0x0730, 0x074A },
21+
{ 0x07A6, 0x07B0 },
22+
{ 0x07EB, 0x07F3 },
23+
{ 0x0816, 0x0819 },
24+
{ 0x081B, 0x0823 },
25+
{ 0x0825, 0x0827 },
26+
{ 0x0829, 0x082D },
27+
{ 0x0859, 0x085B },
28+
{ 0x08E4, 0x08FE },
29+
{ 0x0900, 0x0902 },
30+
{ 0x093A, 0x093A },
31+
{ 0x093C, 0x093C },
32+
{ 0x0941, 0x0948 },
33+
{ 0x094D, 0x094D },
34+
{ 0x0951, 0x0957 },
35+
{ 0x0962, 0x0963 },
36+
{ 0x0981, 0x0981 },
37+
{ 0x09BC, 0x09BC },
38+
{ 0x09C1, 0x09C4 },
39+
{ 0x09CD, 0x09CD },
40+
{ 0x09E2, 0x09E3 },
41+
{ 0x0A01, 0x0A02 },
42+
{ 0x0A3C, 0x0A3C },
43+
{ 0x0A41, 0x0A42 },
44+
{ 0x0A47, 0x0A48 },
45+
{ 0x0A4B, 0x0A4D },
46+
{ 0x0A51, 0x0A51 },
47+
{ 0x0A70, 0x0A71 },
48+
{ 0x0A75, 0x0A75 },
49+
{ 0x0A81, 0x0A82 },
50+
{ 0x0ABC, 0x0ABC },
51+
{ 0x0AC1, 0x0AC5 },
52+
{ 0x0AC7, 0x0AC8 },
53+
{ 0x0ACD, 0x0ACD },
54+
{ 0x0AE2, 0x0AE3 },
55+
{ 0x0B01, 0x0B01 },
56+
{ 0x0B3C, 0x0B3C },
57+
{ 0x0B3F, 0x0B3F },
58+
{ 0x0B41, 0x0B44 },
59+
{ 0x0B4D, 0x0B4D },
60+
{ 0x0B56, 0x0B56 },
61+
{ 0x0B62, 0x0B63 },
62+
{ 0x0B82, 0x0B82 },
63+
{ 0x0BC0, 0x0BC0 },
64+
{ 0x0BCD, 0x0BCD },
65+
{ 0x0C3E, 0x0C40 },
66+
{ 0x0C46, 0x0C48 },
67+
{ 0x0C4A, 0x0C4D },
68+
{ 0x0C55, 0x0C56 },
69+
{ 0x0C62, 0x0C63 },
70+
{ 0x0CBC, 0x0CBC },
71+
{ 0x0CBF, 0x0CBF },
72+
{ 0x0CC6, 0x0CC6 },
73+
{ 0x0CCC, 0x0CCD },
74+
{ 0x0CE2, 0x0CE3 },
75+
{ 0x0D41, 0x0D44 },
76+
{ 0x0D4D, 0x0D4D },
77+
{ 0x0D62, 0x0D63 },
78+
{ 0x0DCA, 0x0DCA },
79+
{ 0x0DD2, 0x0DD4 },
80+
{ 0x0DD6, 0x0DD6 },
81+
{ 0x0E31, 0x0E31 },
82+
{ 0x0E34, 0x0E3A },
83+
{ 0x0E47, 0x0E4E },
84+
{ 0x0EB1, 0x0EB1 },
85+
{ 0x0EB4, 0x0EB9 },
86+
{ 0x0EBB, 0x0EBC },
87+
{ 0x0EC8, 0x0ECD },
88+
{ 0x0F18, 0x0F19 },
89+
{ 0x0F35, 0x0F35 },
90+
{ 0x0F37, 0x0F37 },
91+
{ 0x0F39, 0x0F39 },
92+
{ 0x0F71, 0x0F7E },
93+
{ 0x0F80, 0x0F84 },
94+
{ 0x0F86, 0x0F87 },
95+
{ 0x0F8D, 0x0F97 },
96+
{ 0x0F99, 0x0FBC },
97+
{ 0x0FC6, 0x0FC6 },
98+
{ 0x102D, 0x1030 },
99+
{ 0x1032, 0x1037 },
100+
{ 0x1039, 0x103A },
101+
{ 0x103D, 0x103E },
102+
{ 0x1058, 0x1059 },
103+
{ 0x105E, 0x1060 },
104+
{ 0x1071, 0x1074 },
105+
{ 0x1082, 0x1082 },
106+
{ 0x1085, 0x1086 },
107+
{ 0x108D, 0x108D },
108+
{ 0x109D, 0x109D },
109+
{ 0x1160, 0x11FF },
110+
{ 0x135D, 0x135F },
111+
{ 0x1712, 0x1714 },
112+
{ 0x1732, 0x1734 },
113+
{ 0x1752, 0x1753 },
114+
{ 0x1772, 0x1773 },
115+
{ 0x17B4, 0x17B5 },
116+
{ 0x17B7, 0x17BD },
117+
{ 0x17C6, 0x17C6 },
118+
{ 0x17C9, 0x17D3 },
119+
{ 0x17DD, 0x17DD },
120+
{ 0x180B, 0x180E },
121+
{ 0x18A9, 0x18A9 },
122+
{ 0x1920, 0x1922 },
123+
{ 0x1927, 0x1928 },
124+
{ 0x1932, 0x1932 },
125+
{ 0x1939, 0x193B },
126+
{ 0x1A17, 0x1A18 },
127+
{ 0x1A1B, 0x1A1B },
128+
{ 0x1A56, 0x1A56 },
129+
{ 0x1A58, 0x1A5E },
130+
{ 0x1A60, 0x1A60 },
131+
{ 0x1A62, 0x1A62 },
132+
{ 0x1A65, 0x1A6C },
133+
{ 0x1A73, 0x1A7C },
134+
{ 0x1A7F, 0x1A7F },
135+
{ 0x1B00, 0x1B03 },
136+
{ 0x1B34, 0x1B34 },
137+
{ 0x1B36, 0x1B3A },
138+
{ 0x1B3C, 0x1B3C },
139+
{ 0x1B42, 0x1B42 },
140+
{ 0x1B6B, 0x1B73 },
141+
{ 0x1B80, 0x1B81 },
142+
{ 0x1BA2, 0x1BA5 },
143+
{ 0x1BA8, 0x1BA9 },
144+
{ 0x1BAB, 0x1BAB },
145+
{ 0x1BE6, 0x1BE6 },
146+
{ 0x1BE8, 0x1BE9 },
147+
{ 0x1BED, 0x1BED },
148+
{ 0x1BEF, 0x1BF1 },
149+
{ 0x1C2C, 0x1C33 },
150+
{ 0x1C36, 0x1C37 },
151+
{ 0x1CD0, 0x1CD2 },
152+
{ 0x1CD4, 0x1CE0 },
153+
{ 0x1CE2, 0x1CE8 },
154+
{ 0x1CED, 0x1CED },
155+
{ 0x1CF4, 0x1CF4 },
156+
{ 0x1DC0, 0x1DE6 },
157+
{ 0x1DFC, 0x1DFF },
158+
{ 0x200B, 0x200F },
159+
{ 0x202A, 0x202E },
160+
{ 0x2060, 0x2064 },
161+
{ 0x2066, 0x206F },
162+
{ 0x20D0, 0x20F0 },
163+
{ 0x2CEF, 0x2CF1 },
164+
{ 0x2D7F, 0x2D7F },
165+
{ 0x2DE0, 0x2DFF },
166+
{ 0x302A, 0x302D },
167+
{ 0x3099, 0x309A },
168+
{ 0xA66F, 0xA672 },
169+
{ 0xA674, 0xA67D },
170+
{ 0xA69F, 0xA69F },
171+
{ 0xA6F0, 0xA6F1 },
172+
{ 0xA802, 0xA802 },
173+
{ 0xA806, 0xA806 },
174+
{ 0xA80B, 0xA80B },
175+
{ 0xA825, 0xA826 },
176+
{ 0xA8C4, 0xA8C4 },
177+
{ 0xA8E0, 0xA8F1 },
178+
{ 0xA926, 0xA92D },
179+
{ 0xA947, 0xA951 },
180+
{ 0xA980, 0xA982 },
181+
{ 0xA9B3, 0xA9B3 },
182+
{ 0xA9B6, 0xA9B9 },
183+
{ 0xA9BC, 0xA9BC },
184+
{ 0xAA29, 0xAA2E },
185+
{ 0xAA31, 0xAA32 },
186+
{ 0xAA35, 0xAA36 },
187+
{ 0xAA43, 0xAA43 },
188+
{ 0xAA4C, 0xAA4C },
189+
{ 0xAAB0, 0xAAB0 },
190+
{ 0xAAB2, 0xAAB4 },
191+
{ 0xAAB7, 0xAAB8 },
192+
{ 0xAABE, 0xAABF },
193+
{ 0xAAC1, 0xAAC1 },
194+
{ 0xAAEC, 0xAAED },
195+
{ 0xAAF6, 0xAAF6 },
196+
{ 0xABE5, 0xABE5 },
197+
{ 0xABE8, 0xABE8 },
198+
{ 0xABED, 0xABED },
199+
{ 0xFB1E, 0xFB1E },
200+
{ 0xFE00, 0xFE0F },
201+
{ 0xFE20, 0xFE26 },
202+
{ 0xFEFF, 0xFEFF },
203+
{ 0xFFF9, 0xFFFB },
204+
{ 0x101FD, 0x101FD },
205+
{ 0x10A01, 0x10A03 },
206+
{ 0x10A05, 0x10A06 },
207+
{ 0x10A0C, 0x10A0F },
208+
{ 0x10A38, 0x10A3A },
209+
{ 0x10A3F, 0x10A3F },
210+
{ 0x11001, 0x11001 },
211+
{ 0x11038, 0x11046 },
212+
{ 0x11080, 0x11081 },
213+
{ 0x110B3, 0x110B6 },
214+
{ 0x110B9, 0x110BA },
215+
{ 0x110BD, 0x110BD },
216+
{ 0x11100, 0x11102 },
217+
{ 0x11127, 0x1112B },
218+
{ 0x1112D, 0x11134 },
219+
{ 0x11180, 0x11181 },
220+
{ 0x111B6, 0x111BE },
221+
{ 0x116AB, 0x116AB },
222+
{ 0x116AD, 0x116AD },
223+
{ 0x116B0, 0x116B5 },
224+
{ 0x116B7, 0x116B7 },
225+
{ 0x16F8F, 0x16F92 },
226+
{ 0x1D167, 0x1D169 },
227+
{ 0x1D173, 0x1D182 },
228+
{ 0x1D185, 0x1D18B },
229+
{ 0x1D1AA, 0x1D1AD },
230+
{ 0x1D242, 0x1D244 },
231+
{ 0xE0001, 0xE0001 },
232+
{ 0xE0020, 0xE007F },
233+
{ 0xE0100, 0xE01EF }
234+
};
235+
static const struct interval double_width[] = {
236+
{ /* plane */ 0x0, 0x1C },
237+
{ /* plane */ 0x1C, 0x21 },
238+
{ /* plane */ 0x21, 0x22 },
239+
{ /* plane */ 0x22, 0x23 },
240+
{ /* plane */ 0x0, 0x0 },
241+
{ /* plane */ 0x0, 0x0 },
242+
{ /* plane */ 0x0, 0x0 },
243+
{ /* plane */ 0x0, 0x0 },
244+
{ /* plane */ 0x0, 0x0 },
245+
{ /* plane */ 0x0, 0x0 },
246+
{ /* plane */ 0x0, 0x0 },
247+
{ /* plane */ 0x0, 0x0 },
248+
{ /* plane */ 0x0, 0x0 },
249+
{ /* plane */ 0x0, 0x0 },
250+
{ /* plane */ 0x0, 0x0 },
251+
{ /* plane */ 0x0, 0x0 },
252+
{ /* plane */ 0x0, 0x0 },
253+
{ 0x1100, 0x115F },
254+
{ 0x2329, 0x232A },
255+
{ 0x2E80, 0x2E99 },
256+
{ 0x2E9B, 0x2EF3 },
257+
{ 0x2F00, 0x2FD5 },
258+
{ 0x2FF0, 0x2FFB },
259+
{ 0x3000, 0x303E },
260+
{ 0x3041, 0x3096 },
261+
{ 0x3099, 0x30FF },
262+
{ 0x3105, 0x312D },
263+
{ 0x3131, 0x318E },
264+
{ 0x3190, 0x31BA },
265+
{ 0x31C0, 0x31E3 },
266+
{ 0x31F0, 0x321E },
267+
{ 0x3220, 0x3247 },
268+
{ 0x3250, 0x32FE },
269+
{ 0x3300, 0x4DBF },
270+
{ 0x4E00, 0xA48C },
271+
{ 0xA490, 0xA4C6 },
272+
{ 0xA960, 0xA97C },
273+
{ 0xAC00, 0xD7A3 },
274+
{ 0xF900, 0xFAFF },
275+
{ 0xFE10, 0xFE19 },
276+
{ 0xFE30, 0xFE52 },
277+
{ 0xFE54, 0xFE66 },
278+
{ 0xFE68, 0xFE6B },
279+
{ 0xFF01, 0xFF60 },
280+
{ 0xFFE0, 0xFFE6 },
281+
{ 0x1B000, 0x1B001 },
282+
{ 0x1F200, 0x1F202 },
283+
{ 0x1F210, 0x1F23A },
284+
{ 0x1F240, 0x1F248 },
285+
{ 0x1F250, 0x1F251 },
286+
{ 0x20000, 0x2FFFD },
287+
{ 0x30000, 0x3FFFD }
288+
};

update_unicode.sh

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/sh
2+
#See http://www.unicode.org/reports/tr44/
3+
#
4+
#Me Enclosing_Mark an enclosing combining mark
5+
#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
6+
#Cf Format a format control character
7+
#
8+
UNICODEWIDTH_H=../unicode_width.h
9+
if ! test -d unicode; then
10+
mkdir unicode
11+
fi &&
12+
( cd unicode &&
13+
if ! test -f UnicodeData.txt; then
14+
wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
15+
fi &&
16+
if ! test -f EastAsianWidth.txt; then
17+
wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
18+
fi &&
19+
if ! test -d uniset; then
20+
git clone https://github.com/depp/uniset.git
21+
fi &&
22+
(
23+
cd uniset &&
24+
if ! test -x uniset; then
25+
autoreconf -i &&
26+
./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
27+
fi &&
28+
make
29+
) &&
30+
echo "static const struct interval zero_width[] = {" >$UNICODEWIDTH_H &&
31+
UNICODE_DIR=. ./uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
32+
grep -v plane >>$UNICODEWIDTH_H &&
33+
echo "};" >>$UNICODEWIDTH_H &&
34+
echo "static const struct interval double_width[] = {" >>$UNICODEWIDTH_H &&
35+
UNICODE_DIR=. ./uniset/uniset --32 eaw:F,W >>$UNICODEWIDTH_H &&
36+
echo "};" >>$UNICODEWIDTH_H
37+
)

0 commit comments

Comments
 (0)