Skip to content

Commit 9e3b65d

Browse files
committed
stdlib: Check for illegal surrogates when wchar_t is 2 bytes
Handling 2-byte wchar_t values requires extra care to avoid illegal surrogate sequences both in and out of the library. Make sure a high surrogate is always followed by a low surrogate and that a low surrogate never stands alone. When parsing utf-8 into 2-byte wchar_t, make sure the utf-8 value doesn't contain a surrogate value. Signed-off-by: Keith Packard <[email protected]>
1 parent 00baa82 commit 9e3b65d

File tree

2 files changed

+21
-1
lines changed

2 files changed

+21
-1
lines changed

newlib/libc/stdlib/mbtowc_r.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,12 @@ __utf8_mbtowc (
629629
tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
630630
| (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
631631
| (wchar_t)(ch & 0x3f);
632+
/* Check for surrogates */
633+
if (0xd800 <= tmp && tmp <= 0xdfff)
634+
{
635+
_REENT_ERRNO(r) = EILSEQ;
636+
return -1;
637+
}
632638
*pwc = tmp;
633639
return i;
634640
}

newlib/libc/stdlib/wctomb_r.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ __utf8_wctomb (
5555
if (sizeof (wchar_t) == 2 && state->__count == -4
5656
&& (wchar < 0xdc00 || wchar > 0xdfff))
5757
{
58+
/* Unexpected extra high surrogate */
59+
if (0xd800 <= wchar && wchar <= 0xdbff)
60+
{
61+
_REENT_ERRNO(r) = EILSEQ;
62+
return -1;
63+
}
5864
/* There's a leftover lone high surrogate. Write out the CESU-8 value
5965
of the surrogate and proceed to convert the given character. Note
6066
to return extra 3 bytes. */
@@ -86,6 +92,12 @@ __utf8_wctomb (
8692
uint32_t tmp;
8793
if (wchar <= 0xdbff)
8894
{
95+
if (state->__count == -4)
96+
{
97+
/* Extra high surrogate */
98+
_REENT_ERRNO(r) = EILSEQ;
99+
return -1;
100+
}
89101
/* First half of a surrogate pair. Store the state and
90102
return ret + 0. */
91103
tmp = ((wchar & 0x3ff) << 10) + 0x10000;
@@ -110,7 +122,9 @@ __utf8_wctomb (
110122
*s = 0x80 | (tmp & 0x3f);
111123
return 4;
112124
}
113-
/* Otherwise translate into CESU-8 value. */
125+
/* Unexpected second half */
126+
_REENT_ERRNO(r) = EILSEQ;
127+
return -1;
114128
}
115129
*s++ = 0xe0 | ((wchar & 0xf000) >> 12);
116130
*s++ = 0x80 | ((wchar & 0xfc0) >> 6);

0 commit comments

Comments
 (0)