Skip to content

Commit fa6185c

Browse files
committed
updated for version 7.3.253
Problem: "echo 'abc' > ''" returns 0 or 1, depending on 'ignorecase'. Checks in mb_strnicmp() for illegal and truncated bytes are wrong. Should not assume that byte length is equal before case folding. Solution: Add utf_safe_read_char_adv() and utf_strnicmp(). Add a test for this. (Ivan Krasilnikov)
1 parent c9cf249 commit fa6185c

File tree

8 files changed

+158
-46
lines changed

8 files changed

+158
-46
lines changed

src/mbyte.c

Lines changed: 147 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ static int utf_ptr2cells_len __ARGS((char_u *p, int size));
132132
static int dbcs_char2cells __ARGS((int c));
133133
static int dbcs_ptr2cells_len __ARGS((char_u *p, int size));
134134
static int dbcs_ptr2char __ARGS((char_u *p));
135+
static int utf_safe_read_char_adv __ARGS((char_u **s, size_t *n));
135136

136137
/*
137138
* Lookup table to quickly get the length in bytes of a UTF-8 character from
@@ -1700,6 +1701,66 @@ utf_ptr2char(p)
17001701
return p[0];
17011702
}
17021703

1704+
/*
1705+
* Convert a UTF-8 byte sequence to a wide character.
1706+
* String is assumed to be terminated by NUL or after "n" bytes, whichever
1707+
* comes first.
1708+
* The function is safe in the sense that it never accesses memory beyond the
1709+
* first "n" bytes of "s".
1710+
*
1711+
* On success, returns decoded codepoint, advances "s" to the beginning of
1712+
* next character and decreases "n" accordingly.
1713+
*
1714+
* If end of string was reached, returns 0 and, if "n" > 0, advances "s" past
1715+
* NUL byte.
1716+
*
1717+
* If byte sequence is illegal or incomplete, returns -1 and does not advance
1718+
* "s".
1719+
*/
1720+
static int
1721+
utf_safe_read_char_adv(s, n)
1722+
char_u **s;
1723+
size_t *n;
1724+
{
1725+
int c, k;
1726+
1727+
if (*n == 0) /* end of buffer */
1728+
return 0;
1729+
1730+
k = utf8len_tab_zero[**s];
1731+
1732+
if (k == 1)
1733+
{
1734+
/* ASCII character or NUL */
1735+
(*n)--;
1736+
return *(*s)++;
1737+
}
1738+
1739+
if ((size_t)k <= *n)
1740+
{
1741+
/* We have a multibyte sequence and it isn't truncated by buffer
1742+
* limits so utf_ptr2char() is safe to use. Or the first byte is
1743+
* illegal (k=0), and it's also safe to use utf_ptr2char(). */
1744+
c = utf_ptr2char(*s);
1745+
1746+
/* On failure, utf_ptr2char() returns the first byte, so here we
1747+
* check equality with the first byte. The only non-ASCII character
1748+
* which equals the first byte of its own UTF-8 representation is
1749+
* U+00C3 (UTF-8: 0xC3 0x83), so need to check that special case too.
1750+
* It's safe even if n=1, else we would have k=2 > n. */
1751+
if (c != (int)(**s) || (c == 0xC3 && (*s)[1] == 0x83))
1752+
{
1753+
/* byte sequence was successfully decoded */
1754+
*s += k;
1755+
*n -= k;
1756+
return c;
1757+
}
1758+
}
1759+
1760+
/* byte sequence is incomplete or illegal */
1761+
return -1;
1762+
}
1763+
17031764
/*
17041765
* Get character at **pp and advance *pp to the next character.
17051766
* Note: composing characters are skipped!
@@ -2667,7 +2728,8 @@ static convertStruct foldCase[] =
26672728
{0x10400,0x10427,1,40}
26682729
};
26692730

2670-
static int utf_convert(int a, convertStruct table[], int tableSize);
2731+
static int utf_convert __ARGS((int a, convertStruct table[], int tableSize));
2732+
static int utf_strnicmp __ARGS((char_u *s1, char_u *s2, size_t n1, size_t n2));
26712733

26722734
/*
26732735
* Generic conversion function for case operations.
@@ -3079,6 +3141,80 @@ utf_isupper(a)
30793141
return (utf_tolower(a) != a);
30803142
}
30813143

3144+
static int
3145+
utf_strnicmp(s1, s2, n1, n2)
3146+
char_u *s1, *s2;
3147+
size_t n1, n2;
3148+
{
3149+
int c1, c2, cdiff;
3150+
char_u buffer[6];
3151+
3152+
for (;;)
3153+
{
3154+
c1 = utf_safe_read_char_adv(&s1, &n1);
3155+
c2 = utf_safe_read_char_adv(&s2, &n2);
3156+
3157+
if (c1 <= 0 || c2 <= 0)
3158+
break;
3159+
3160+
if (c1 == c2)
3161+
continue;
3162+
3163+
cdiff = utf_fold(c1) - utf_fold(c2);
3164+
if (cdiff != 0)
3165+
return cdiff;
3166+
}
3167+
3168+
/* some string ended or has an incomplete/illegal character sequence */
3169+
3170+
if (c1 == 0 || c2 == 0)
3171+
{
3172+
/* some string ended. shorter string is smaller */
3173+
if (c1 == 0 && c2 == 0)
3174+
return 0;
3175+
return c1 == 0 ? -1 : 1;
3176+
}
3177+
3178+
/* Continue with bytewise comparison to produce some result that
3179+
* would make comparison operations involving this function transitive.
3180+
*
3181+
* If only one string had an error, comparison should be made with
3182+
* folded version of the other string. In this case it is enough
3183+
* to fold just one character to determine the result of comparison. */
3184+
3185+
if (c1 != -1 && c2 == -1)
3186+
{
3187+
n1 = utf_char2bytes(utf_fold(c1), buffer);
3188+
s1 = buffer;
3189+
}
3190+
else if (c2 != -1 && c1 == -1)
3191+
{
3192+
n2 = utf_char2bytes(utf_fold(c2), buffer);
3193+
s2 = buffer;
3194+
}
3195+
3196+
while (n1 > 0 && n2 > 0 && *s1 != NUL && *s2 != NUL)
3197+
{
3198+
cdiff = (int)(*s1) - (int)(*s2);
3199+
if (cdiff != 0)
3200+
return cdiff;
3201+
3202+
s1++;
3203+
s2++;
3204+
n1--;
3205+
n2--;
3206+
}
3207+
3208+
if (n1 > 0 && *s1 == NUL)
3209+
n1 = 0;
3210+
if (n2 > 0 && *s2 == NUL)
3211+
n2 = 0;
3212+
3213+
if (n1 == 0 && n2 == 0)
3214+
return 0;
3215+
return n1 == 0 ? -1 : 1;
3216+
}
3217+
30823218
/*
30833219
* Version of strnicmp() that handles multi-byte characters.
30843220
* Needed for Big5, Sjift-JIS and UTF-8 encoding. Other DBCS encodings can
@@ -3092,49 +3228,21 @@ mb_strnicmp(s1, s2, nn)
30923228
char_u *s1, *s2;
30933229
size_t nn;
30943230
{
3095-
int i, j, l;
3231+
int i, l;
30963232
int cdiff;
3097-
int incomplete = FALSE;
30983233
int n = (int)nn;
30993234

3100-
for (i = 0; i < n; i += l)
3235+
if (enc_utf8)
31013236
{
3102-
if (s1[i] == NUL && s2[i] == NUL) /* both strings end */
3103-
return 0;
3104-
if (enc_utf8)
3105-
{
3106-
l = utf_byte2len(s1[i]);
3107-
if (l > n - i)
3108-
{
3109-
l = n - i; /* incomplete character */
3110-
incomplete = TRUE;
3111-
}
3112-
/* Check directly first, it's faster. */
3113-
for (j = 0; j < l; ++j)
3114-
{
3115-
if (s1[i + j] != s2[i + j])
3116-
break;
3117-
if (s1[i + j] == 0)
3118-
/* Both stings have the same bytes but are incomplete or
3119-
* have illegal bytes, accept them as equal. */
3120-
l = j;
3121-
}
3122-
if (j < l)
3123-
{
3124-
/* If one of the two characters is incomplete return -1. */
3125-
if (incomplete || i + utf_byte2len(s2[i]) > n)
3126-
return -1;
3127-
/* Don't case-fold illegal bytes or truncated characters. */
3128-
if (utf_ptr2len(s1 + i) < l || utf_ptr2len(s2 + i) < l)
3129-
return -1;
3130-
cdiff = utf_fold(utf_ptr2char(s1 + i))
3131-
- utf_fold(utf_ptr2char(s2 + i));
3132-
if (cdiff != 0)
3133-
return cdiff;
3134-
}
3135-
}
3136-
else
3237+
return utf_strnicmp(s1, s2, nn, nn);
3238+
}
3239+
else
3240+
{
3241+
for (i = 0; i < n; i += l)
31373242
{
3243+
if (s1[i] == NUL && s2[i] == NUL) /* both strings end */
3244+
return 0;
3245+
31383246
l = (*mb_ptr2len)(s1 + i);
31393247
if (l <= 1)
31403248
{

src/testdir/Make_amiga.mak

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
2929
test66.out test67.out test68.out test69.out test70.out \
3030
test71.out test72.out test73.out test74.out test75.out \
3131
test76.out test77.out test78.out test79.out test80.out \
32-
test81.out
32+
test81.out test82.out
3333

3434
.SUFFIXES: .in .out
3535

@@ -130,3 +130,4 @@ test78.out: test78.in
130130
test79.out: test79.in
131131
test80.out: test80.in
132132
test81.out: test81.in
133+
test82.out: test82.in

src/testdir/Make_dos.mak

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ SCRIPTS = test3.out test4.out test5.out test6.out test7.out \
2929
test42.out test52.out test65.out test66.out test67.out \
3030
test68.out test69.out test71.out test72.out test73.out \
3131
test74.out test75.out test76.out test77.out test78.out \
32-
test79.out test80.out test81.out
32+
test79.out test80.out test81.out test82.out
3333

3434
SCRIPTS32 = test50.out test70.out
3535

src/testdir/Make_ming.mak

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ SCRIPTS = test3.out test4.out test5.out test6.out test7.out \
4949
test42.out test52.out test65.out test66.out test67.out \
5050
test68.out test69.out test71.out test72.out test73.out \
5151
test74.out test75.out test76.out test77.out test78.out \
52-
test79.out test80.out test81.out
52+
test79.out test80.out test81.out test82.out
5353

5454
SCRIPTS32 = test50.out test70.out
5555

src/testdir/Make_os2.mak

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
2929
test66.out test67.out test68.out test69.out test70.out \
3030
test71.out test72.out test73.out test74.out test75.out \
3131
test76.out test77.out test78.out test79.out test80.out \
32-
test81.out
32+
test81.out test82.out
3333

3434
.SUFFIXES: .in .out
3535

src/testdir/Make_vms.mms

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Authors: Zoltan Arpadffy, <[email protected]>
55
# Sandor Kopanyi, <[email protected]>
66
#
7-
# Last change: 2011 Jun 26
7+
# Last change: 2011 Jul 15
88
#
99
# This has been tested on VMS 6.2 to 8.3 on DEC Alpha, VAX and IA64.
1010
# Edit the lines in the Configuration section below to select.
@@ -75,7 +75,8 @@ SCRIPT = test1.out test2.out test3.out test4.out test5.out \
7575
test61.out test62.out test63.out test64.out test65.out \
7676
test66.out test67.out test68.out test69.out \
7777
test71.out test72.out test74.out test75.out test76.out \
78-
test77.out test78.out test79.out test80.out test81.out
78+
test77.out test78.out test79.out test80.out test81.out \
79+
test82.out
7980

8081
# Known problems:
8182
# Test 30: a problem around mac format - unknown reason

src/testdir/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ SCRIPTS = test1.out test2.out test3.out test4.out test5.out test6.out \
2626
test64.out test65.out test66.out test67.out test68.out \
2727
test69.out test70.out test71.out test72.out test73.out \
2828
test74.out test75.out test76.out test77.out test78.out \
29-
test79.out test80.out test81.out
29+
test79.out test80.out test81.out test82.out
3030

3131
SCRIPTS_GUI = test16.out
3232

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,8 @@ static char *(features[]) =
709709

710710
static int included_patches[] =
711711
{ /* Add new patch number below this line */
712+
/**/
713+
253,
712714
/**/
713715
252,
714716
/**/

0 commit comments

Comments
 (0)