Skip to content

Commit cc5b22b

Browse files
committed
patch 8.0.0243: tolower() does not work if the byte count changes
Problem: When making a character lower case with tolower() changes the byte cound, it is not made lower case. Solution: Add strlow_save(). (Dominique Pelle, closes #1406)
1 parent 65c836e commit cc5b22b

File tree

5 files changed

+216
-33
lines changed

5 files changed

+216
-33
lines changed

src/evalfunc.c

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12503,39 +12503,8 @@ f_timer_stopall(typval_T *argvars UNUSED, typval_T *rettv UNUSED)
1250312503
static void
1250412504
f_tolower(typval_T *argvars, typval_T *rettv)
1250512505
{
12506-
char_u *p;
12507-
12508-
p = vim_strsave(get_tv_string(&argvars[0]));
1250912506
rettv->v_type = VAR_STRING;
12510-
rettv->vval.v_string = p;
12511-
12512-
if (p != NULL)
12513-
while (*p != NUL)
12514-
{
12515-
#ifdef FEAT_MBYTE
12516-
int l;
12517-
12518-
if (enc_utf8)
12519-
{
12520-
int c, lc;
12521-
12522-
c = utf_ptr2char(p);
12523-
lc = utf_tolower(c);
12524-
l = utf_ptr2len(p);
12525-
/* TODO: reallocate string when byte count changes. */
12526-
if (utf_char2len(lc) == l)
12527-
utf_char2bytes(lc, p);
12528-
p += l;
12529-
}
12530-
else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
12531-
p += l; /* skip multi-byte character */
12532-
else
12533-
#endif
12534-
{
12535-
*p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
12536-
++p;
12537-
}
12538-
}
12507+
rettv->vval.v_string = strlow_save(get_tv_string(&argvars[0]));
1253912508
}
1254012509

1254112510
/*

src/misc2.c

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,10 @@ strup_save(char_u *orig)
16021602
{
16031603
s = alloc((unsigned)STRLEN(res) + 1 + newl - l);
16041604
if (s == NULL)
1605-
break;
1605+
{
1606+
vim_free(res);
1607+
return NULL;
1608+
}
16061609
mch_memmove(s, res, p - res);
16071610
STRCPY(s + (p - res) + newl, p + l);
16081611
p = s + (p - res);
@@ -1625,6 +1628,69 @@ strup_save(char_u *orig)
16251628

16261629
return res;
16271630
}
1631+
1632+
/*
1633+
* Make string "s" all lower-case and return it in allocated memory.
1634+
* Handles multi-byte characters as well as possible.
1635+
* Returns NULL when out of memory.
1636+
*/
1637+
char_u *
1638+
strlow_save(char_u *orig)
1639+
{
1640+
char_u *p;
1641+
char_u *res;
1642+
1643+
res = p = vim_strsave(orig);
1644+
1645+
if (res != NULL)
1646+
while (*p != NUL)
1647+
{
1648+
# ifdef FEAT_MBYTE
1649+
int l;
1650+
1651+
if (enc_utf8)
1652+
{
1653+
int c, lc;
1654+
int newl;
1655+
char_u *s;
1656+
1657+
c = utf_ptr2char(p);
1658+
lc = utf_tolower(c);
1659+
1660+
/* Reallocate string when byte count changes. This is rare,
1661+
* thus it's OK to do another malloc()/free(). */
1662+
l = utf_ptr2len(p);
1663+
newl = utf_char2len(lc);
1664+
if (newl != l)
1665+
{
1666+
s = alloc((unsigned)STRLEN(res) + 1 + newl - l);
1667+
if (s == NULL)
1668+
{
1669+
vim_free(res);
1670+
return NULL;
1671+
}
1672+
mch_memmove(s, res, p - res);
1673+
STRCPY(s + (p - res) + newl, p + l);
1674+
p = s + (p - res);
1675+
vim_free(res);
1676+
res = s;
1677+
}
1678+
1679+
utf_char2bytes(lc, p);
1680+
p += newl;
1681+
}
1682+
else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
1683+
p += l; /* skip multi-byte character */
1684+
else
1685+
# endif
1686+
{
1687+
*p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
1688+
p++;
1689+
}
1690+
}
1691+
1692+
return res;
1693+
}
16281694
#endif
16291695

16301696
/*

src/proto/misc2.pro

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ char_u *vim_strsave_up(char_u *string);
4040
char_u *vim_strnsave_up(char_u *string, int len);
4141
void vim_strup(char_u *p);
4242
char_u *strup_save(char_u *orig);
43+
char_u *strlow_save(char_u *orig);
4344
void del_trailing_spaces(char_u *ptr);
4445
void vim_strncpy(char_u *to, char_u *from, size_t len);
4546
void vim_strcat(char_u *to, char_u *from, size_t tosize);

src/testdir/test_functions.vim

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,148 @@ func Test_str2nr()
1616
call assert_equal(123456789, str2nr('123456789'))
1717
call assert_equal(-123456789, str2nr('-123456789'))
1818
endfunc
19+
20+
func Test_tolower()
21+
call assert_equal("", tolower(""))
22+
23+
" Test with all printable ASCII characters.
24+
call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
25+
\ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
26+
27+
if !has('multi_byte')
28+
return
29+
endif
30+
31+
" Test with a few uppercase diacritics.
32+
call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
33+
call assert_equal("bḃḇ", tolower("BḂḆ"))
34+
call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ"))
35+
call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ"))
36+
call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ"))
37+
call assert_equal("fḟ ", tolower("FḞ "))
38+
call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ"))
39+
call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ"))
40+
call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ"))
41+
call assert_equal("", tolower(""))
42+
call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ"))
43+
call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ"))
44+
call assert_equal("mḿṁ", tolower("MḾṀ"))
45+
call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ"))
46+
call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
47+
call assert_equal("pṕṗ", tolower("PṔṖ"))
48+
call assert_equal("q", tolower("Q"))
49+
call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ"))
50+
call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ"))
51+
call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ"))
52+
call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
53+
call assert_equal("vṽ", tolower("VṼ"))
54+
call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ"))
55+
call assert_equal("xẋẍ", tolower("XẊẌ"))
56+
call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ"))
57+
call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ"))
58+
59+
" Test with a few lowercase diacritics, which should remain unchanged.
60+
call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả"))
61+
call assert_equal("bḃḇ", tolower("bḃḇ"))
62+
call assert_equal("cçćĉċč", tolower("cçćĉċč"))
63+
call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ"))
64+
call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ"))
65+
call assert_equal("fḟ", tolower("fḟ"))
66+
call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ"))
67+
call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ"))
68+
call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ"))
69+
call assert_equal("jĵǰ", tolower("jĵǰ"))
70+
call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ"))
71+
call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ"))
72+
call assert_equal("mḿṁ ", tolower("mḿṁ "))
73+
call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ"))
74+
call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ"))
75+
call assert_equal("pṕṗ", tolower("pṕṗ"))
76+
call assert_equal("q", tolower("q"))
77+
call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ"))
78+
call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ"))
79+
call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ"))
80+
call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ"))
81+
call assert_equal("vṽ", tolower("vṽ"))
82+
call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ"))
83+
call assert_equal("ẋẍ", tolower("ẋẍ"))
84+
call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ"))
85+
call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ"))
86+
87+
" According to https://twitter.com/jifa/status/625776454479970304
88+
" Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
89+
" in length (2 to 3 bytes) when lowercased. So let's test them.
90+
call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
91+
endfunc
92+
93+
func Test_toupper()
94+
call assert_equal("", toupper(""))
95+
96+
" Test with all printable ASCII characters.
97+
call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~',
98+
\ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
99+
100+
if !has('multi_byte')
101+
return
102+
endif
103+
104+
" Test with a few lowercase diacritics.
105+
call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả"))
106+
call assert_equal("BḂḆ", toupper("bḃḇ"))
107+
call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč"))
108+
call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ"))
109+
call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ"))
110+
call assert_equal("FḞ", toupper("fḟ"))
111+
call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ"))
112+
call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ"))
113+
call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ"))
114+
call assert_equal("JĴǰ", toupper("jĵǰ"))
115+
call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ"))
116+
call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ"))
117+
call assert_equal("MḾṀ ", toupper("mḿṁ "))
118+
call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ"))
119+
call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ"))
120+
call assert_equal("PṔṖ", toupper("pṕṗ"))
121+
call assert_equal("Q", toupper("q"))
122+
call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ"))
123+
call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ"))
124+
call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ"))
125+
call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ"))
126+
call assert_equal("VṼ", toupper("vṽ"))
127+
call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ"))
128+
call assert_equal("ẊẌ", toupper("ẋẍ"))
129+
call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ"))
130+
call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ"))
131+
132+
" Test that uppercase diacritics, which should remain unchanged.
133+
call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
134+
call assert_equal("BḂḆ", toupper("BḂḆ"))
135+
call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ"))
136+
call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ"))
137+
call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ"))
138+
call assert_equal("FḞ ", toupper("FḞ "))
139+
call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ"))
140+
call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ"))
141+
call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ"))
142+
call assert_equal("", toupper(""))
143+
call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ"))
144+
call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ"))
145+
call assert_equal("MḾṀ", toupper("MḾṀ"))
146+
call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ"))
147+
call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
148+
call assert_equal("PṔṖ", toupper("PṔṖ"))
149+
call assert_equal("Q", toupper("Q"))
150+
call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ"))
151+
call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ"))
152+
call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ"))
153+
call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
154+
call assert_equal("VṼ", toupper("VṼ"))
155+
call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ"))
156+
call assert_equal("XẊẌ", toupper("XẊẌ"))
157+
call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ"))
158+
call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))
159+
160+
call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
161+
endfunc
162+
163+

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,8 @@ static char *(features[]) =
764764

765765
static int included_patches[] =
766766
{ /* Add new patch number below this line */
767+
/**/
768+
243,
767769
/**/
768770
242,
769771
/**/

0 commit comments

Comments
 (0)