Skip to content

Commit 4b75bd3

Browse files
nivedita76ardbiesheuvel
authored andcommitted
efi/libstub: Add UTF-8 decoding to efi_puts
In order to be able to use the UTF-16 support added to vsprintf in the previous commit, enhance efi_puts to decode UTF-8 into UTF-16. Invalid UTF-8 encodings are passed through unchanged. Signed-off-by: Arvind Sankar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Ard Biesheuvel <[email protected]>
1 parent d850a2f commit 4b75bd3

File tree

1 file changed

+62
-5
lines changed

1 file changed

+62
-5
lines changed

drivers/firmware/efi/libstub/efi-stub-helper.c

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,74 @@ void efi_char16_puts(efi_char16_t *str)
3636
output_string, str);
3737
}
3838

39+
static
40+
u32 utf8_to_utf32(const u8 **s8)
41+
{
42+
u32 c32;
43+
u8 c0, cx;
44+
size_t clen, i;
45+
46+
c0 = cx = *(*s8)++;
47+
/*
48+
* The position of the most-significant 0 bit gives us the length of
49+
* a multi-octet encoding.
50+
*/
51+
for (clen = 0; cx & 0x80; ++clen)
52+
cx <<= 1;
53+
/*
54+
* If the 0 bit is in position 8, this is a valid single-octet
55+
* encoding. If the 0 bit is in position 7 or positions 1-3, the
56+
* encoding is invalid.
57+
* In either case, we just return the first octet.
58+
*/
59+
if (clen < 2 || clen > 4)
60+
return c0;
61+
/* Get the bits from the first octet. */
62+
c32 = cx >> clen--;
63+
for (i = 0; i < clen; ++i) {
64+
/* Trailing octets must have 10 in most significant bits. */
65+
cx = (*s8)[i] ^ 0x80;
66+
if (cx & 0xc0)
67+
return c0;
68+
c32 = (c32 << 6) | cx;
69+
}
70+
/*
71+
* Check for validity:
72+
* - The character must be in the Unicode range.
73+
* - It must not be a surrogate.
74+
* - It must be encoded using the correct number of octets.
75+
*/
76+
if (c32 > 0x10ffff ||
77+
(c32 & 0xf800) == 0xd800 ||
78+
clen != (c32 >= 0x80) + (c32 >= 0x800) + (c32 >= 0x10000))
79+
return c0;
80+
*s8 += clen;
81+
return c32;
82+
}
83+
3984
void efi_puts(const char *str)
4085
{
4186
efi_char16_t buf[128];
4287
size_t pos = 0, lim = ARRAY_SIZE(buf);
88+
const u8 *s8 = (const u8 *)str;
89+
u32 c32;
4390

44-
while (*str) {
45-
if (*str == '\n')
91+
while (*s8) {
92+
if (*s8 == '\n')
4693
buf[pos++] = L'\r';
47-
/* Cast to unsigned char to avoid sign-extension */
48-
buf[pos++] = (unsigned char)(*str++);
49-
if (*str == '\0' || pos >= lim - 2) {
94+
c32 = utf8_to_utf32(&s8);
95+
if (c32 < 0x10000) {
96+
/* Characters in plane 0 use a single word. */
97+
buf[pos++] = c32;
98+
} else {
99+
/*
100+
* Characters in other planes encode into a surrogate
101+
* pair.
102+
*/
103+
buf[pos++] = (0xd800 - (0x10000 >> 10)) + (c32 >> 10);
104+
buf[pos++] = 0xdc00 + (c32 & 0x3ff);
105+
}
106+
if (*s8 == '\0' || pos >= lim - 2) {
50107
buf[pos] = L'\0';
51108
efi_char16_puts(buf);
52109
pos = 0;

0 commit comments

Comments
 (0)