Skip to content

Commit f474e55

Browse files
committed
Refactor UTF-16LE -> wchar conversion code
1 parent 3f1851d commit f474e55

File tree

1 file changed

+30
-21
lines changed

1 file changed

+30
-21
lines changed

ext/mbstring/libmbfl/filters/mbfilter_utf16.c

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -258,33 +258,42 @@ int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
258258
*/
259259
int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
260260
{
261-
int n;
262-
263261
switch (filter->status) {
264262
case 0:
263+
filter->cache = c & 0xff;
265264
filter->status = 1;
266-
n = c & 0xff;
267-
filter->cache |= n;
268265
break;
269-
default:
270-
filter->status = 0;
271-
n = (filter->cache & 0xff) | ((c & 0xff) << 8);
272-
if (n >= 0xd800 && n < 0xdc00) {
273-
filter->cache = ((n & 0x3ff) << 16) + 0x400000;
274-
} else if (n >= 0xdc00 && n < 0xe000) {
275-
n &= 0x3ff;
276-
n |= (filter->cache & 0xfff0000) >> 6;
277-
filter->cache = 0;
278-
if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
279-
CK((*filter->output_function)(n, filter->data));
280-
} else { /* illegal character */
281-
n &= MBFL_WCSGROUP_MASK;
282-
n |= MBFL_WCSGROUP_THROUGH;
283-
CK((*filter->output_function)(n, filter->data));
284-
}
266+
267+
case 1:
268+
if ((c & 0xfc) == 0xd8) {
269+
/* Looks like we have a surrogate pair here */
270+
filter->cache += ((c & 0x3) << 8);
271+
filter->status = 2;
272+
} else if ((c & 0xfc) == 0xdc) {
273+
/* This is wrong; the second part of the surrogate pair has come first
274+
* Flag it with `MBFL_WCSGROUP_THROUGH`; the following filter will handle
275+
* the error */
276+
int n = (filter->cache + ((c & 0xff) << 8)) | MBFL_WCSGROUP_THROUGH;
277+
filter->status = 0;
278+
CK((*filter->output_function)(n, filter->data));
285279
} else {
286-
filter->cache = 0;
280+
filter->status = 0;
281+
CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));
282+
}
283+
break;
284+
285+
case 2:
286+
filter->cache = (filter->cache << 10) + (c & 0xff);
287+
filter->status = 3;
288+
break;
289+
290+
case 3:
291+
filter->status = 0;
292+
int n = filter->cache + ((c & 0x3) << 8) + 0x10000;
293+
if (n >= MBFL_WCSPLANE_SUPMIN && n < MBFL_WCSPLANE_SUPMAX) {
287294
CK((*filter->output_function)(n, filter->data));
295+
} else { /* illegal character */
296+
CK((*filter->output_function)((n & MBFL_WCSGROUP_MASK) | MBFL_WCSGROUP_THROUGH, filter->data));
288297
}
289298
break;
290299
}

0 commit comments

Comments
 (0)