Skip to content

Commit d850a2f

Browse files
nivedita76ardbiesheuvel
authored andcommitted
efi/printf: Add support for wchar_t (UTF-16)
Support %lc and %ls to output UTF-16 strings (converted to UTF-8). Signed-off-by: Arvind Sankar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Ard Biesheuvel <[email protected]>
1 parent 14c574f commit d850a2f

File tree

1 file changed

+106
-5
lines changed

1 file changed

+106
-5
lines changed

drivers/firmware/efi/libstub/vsprintf.c

Lines changed: 106 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ char *number(char *end, unsigned long long num, int base, char locase)
147147
#define LEFT 16 /* left justified */
148148
#define SMALL 32 /* Must be 32 == 0x20 */
149149
#define SPECIAL 64 /* 0x */
150+
#define WIDE 128 /* UTF-16 string */
150151

151152
static
152153
int get_flags(const char **fmt)
@@ -238,6 +239,58 @@ char get_sign(long long *num, int flags)
238239
return 0;
239240
}
240241

242+
static
243+
size_t utf16s_utf8nlen(const u16 *s16, size_t maxlen)
244+
{
245+
size_t len, clen;
246+
247+
for (len = 0; len < maxlen && *s16; len += clen) {
248+
u16 c0 = *s16++;
249+
250+
/* First, get the length for a BMP character */
251+
clen = 1 + (c0 >= 0x80) + (c0 >= 0x800);
252+
if (len + clen > maxlen)
253+
break;
254+
/*
255+
* If this is a high surrogate, and we're already at maxlen, we
256+
* can't include the character if it's a valid surrogate pair.
257+
* Avoid accessing one extra word just to check if it's valid
258+
* or not.
259+
*/
260+
if ((c0 & 0xfc00) == 0xd800) {
261+
if (len + clen == maxlen)
262+
break;
263+
if ((*s16 & 0xfc00) == 0xdc00) {
264+
++s16;
265+
++clen;
266+
}
267+
}
268+
}
269+
270+
return len;
271+
}
272+
273+
static
274+
u32 utf16_to_utf32(const u16 **s16)
275+
{
276+
u16 c0, c1;
277+
278+
c0 = *(*s16)++;
279+
/* not a surrogate */
280+
if ((c0 & 0xf800) != 0xd800)
281+
return c0;
282+
/* invalid: low surrogate instead of high */
283+
if (c0 & 0x0400)
284+
return 0xfffd;
285+
c1 = **s16;
286+
/* invalid: missing low surrogate */
287+
if ((c1 & 0xfc00) != 0xdc00)
288+
return 0xfffd;
289+
/* valid surrogate pair */
290+
++(*s16);
291+
return (0x10000 - (0xd800 << 10) - 0xdc00) + (c0 << 10) + c1;
292+
}
293+
241294
#define PUTC(c) \
242295
do { \
243296
if (pos < size) \
@@ -325,18 +378,31 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list ap)
325378
switch (*fmt) {
326379
case 'c':
327380
flags &= LEFT;
328-
tmp[0] = (unsigned char)va_arg(args, int);
329381
s = tmp;
330-
precision = len = 1;
382+
if (qualifier == 'l') {
383+
((u16 *)tmp)[0] = (u16)va_arg(args, unsigned int);
384+
((u16 *)tmp)[1] = L'\0';
385+
precision = INT_MAX;
386+
goto wstring;
387+
} else {
388+
tmp[0] = (unsigned char)va_arg(args, int);
389+
precision = len = 1;
390+
}
331391
goto output;
332392

333393
case 's':
334394
flags &= LEFT;
335395
if (precision < 0)
336396
precision = INT_MAX;
337-
s = va_arg(args, char *);
397+
s = va_arg(args, void *);
338398
if (!s)
339399
s = precision < 6 ? "" : "(null)";
400+
else if (qualifier == 'l') {
401+
wstring:
402+
flags |= WIDE;
403+
precision = len = utf16s_utf8nlen((const u16 *)s, precision);
404+
goto output;
405+
}
340406
precision = len = strnlen(s, precision);
341407
goto output;
342408

@@ -436,8 +502,43 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list ap)
436502
while (precision-- > len)
437503
PUTC('0');
438504
/* Actual output */
439-
while (len-- > 0)
440-
PUTC(*s++);
505+
if (flags & WIDE) {
506+
const u16 *ws = (const u16 *)s;
507+
508+
while (len-- > 0) {
509+
u32 c32 = utf16_to_utf32(&ws);
510+
u8 *s8;
511+
size_t clen;
512+
513+
if (c32 < 0x80) {
514+
PUTC(c32);
515+
continue;
516+
}
517+
518+
/* Number of trailing octets */
519+
clen = 1 + (c32 >= 0x800) + (c32 >= 0x10000);
520+
521+
len -= clen;
522+
s8 = (u8 *)&buf[pos];
523+
524+
/* Avoid writing partial character */
525+
PUTC('\0');
526+
pos += clen;
527+
if (pos >= size)
528+
continue;
529+
530+
/* Set high bits of leading octet */
531+
*s8 = (0xf00 >> 1) >> clen;
532+
/* Write trailing octets in reverse order */
533+
for (s8 += clen; clen; --clen, c32 >>= 6)
534+
*s8-- = 0x80 | (c32 & 0x3f);
535+
/* Set low bits of leading octet */
536+
*s8 |= c32;
537+
}
538+
} else {
539+
while (len-- > 0)
540+
PUTC(*s++);
541+
}
441542
/* Trailing padding with ' ' */
442543
while (field_width-- > 0)
443544
PUTC(' ');

0 commit comments

Comments
 (0)