@@ -147,6 +147,7 @@ char *number(char *end, unsigned long long num, int base, char locase)
147
147
#define LEFT 16 /* left justified */
148
148
#define SMALL 32 /* Must be 32 == 0x20 */
149
149
#define SPECIAL 64 /* 0x */
150
+ #define WIDE 128 /* UTF-16 string */
150
151
151
152
static
152
153
int get_flags (const char * * fmt )
@@ -238,6 +239,58 @@ char get_sign(long long *num, int flags)
238
239
return 0 ;
239
240
}
240
241
242
+ static
243
+ size_t utf16s_utf8nlen (const u16 * s16 , size_t maxlen )
244
+ {
245
+ size_t len , clen ;
246
+
247
+ for (len = 0 ; len < maxlen && * s16 ; len += clen ) {
248
+ u16 c0 = * s16 ++ ;
249
+
250
+ /* First, get the length for a BMP character */
251
+ clen = 1 + (c0 >= 0x80 ) + (c0 >= 0x800 );
252
+ if (len + clen > maxlen )
253
+ break ;
254
+ /*
255
+ * If this is a high surrogate, and we're already at maxlen, we
256
+ * can't include the character if it's a valid surrogate pair.
257
+ * Avoid accessing one extra word just to check if it's valid
258
+ * or not.
259
+ */
260
+ if ((c0 & 0xfc00 ) == 0xd800 ) {
261
+ if (len + clen == maxlen )
262
+ break ;
263
+ if ((* s16 & 0xfc00 ) == 0xdc00 ) {
264
+ ++ s16 ;
265
+ ++ clen ;
266
+ }
267
+ }
268
+ }
269
+
270
+ return len ;
271
+ }
272
+
273
+ static
274
+ u32 utf16_to_utf32 (const u16 * * s16 )
275
+ {
276
+ u16 c0 , c1 ;
277
+
278
+ c0 = * (* s16 )++ ;
279
+ /* not a surrogate */
280
+ if ((c0 & 0xf800 ) != 0xd800 )
281
+ return c0 ;
282
+ /* invalid: low surrogate instead of high */
283
+ if (c0 & 0x0400 )
284
+ return 0xfffd ;
285
+ c1 = * * s16 ;
286
+ /* invalid: missing low surrogate */
287
+ if ((c1 & 0xfc00 ) != 0xdc00 )
288
+ return 0xfffd ;
289
+ /* valid surrogate pair */
290
+ ++ (* s16 );
291
+ return (0x10000 - (0xd800 << 10 ) - 0xdc00 ) + (c0 << 10 ) + c1 ;
292
+ }
293
+
241
294
#define PUTC (c ) \
242
295
do { \
243
296
if (pos < size) \
@@ -325,18 +378,31 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list ap)
325
378
switch (* fmt ) {
326
379
case 'c' :
327
380
flags &= LEFT ;
328
- tmp [0 ] = (unsigned char )va_arg (args , int );
329
381
s = tmp ;
330
- precision = len = 1 ;
382
+ if (qualifier == 'l' ) {
383
+ ((u16 * )tmp )[0 ] = (u16 )va_arg (args , unsigned int );
384
+ ((u16 * )tmp )[1 ] = L'\0' ;
385
+ precision = INT_MAX ;
386
+ goto wstring ;
387
+ } else {
388
+ tmp [0 ] = (unsigned char )va_arg (args , int );
389
+ precision = len = 1 ;
390
+ }
331
391
goto output ;
332
392
333
393
case 's' :
334
394
flags &= LEFT ;
335
395
if (precision < 0 )
336
396
precision = INT_MAX ;
337
- s = va_arg (args , char * );
397
+ s = va_arg (args , void * );
338
398
if (!s )
339
399
s = precision < 6 ? "" : "(null)" ;
400
+ else if (qualifier == 'l' ) {
401
+ wstring :
402
+ flags |= WIDE ;
403
+ precision = len = utf16s_utf8nlen ((const u16 * )s , precision );
404
+ goto output ;
405
+ }
340
406
precision = len = strnlen (s , precision );
341
407
goto output ;
342
408
@@ -436,8 +502,43 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list ap)
436
502
while (precision -- > len )
437
503
PUTC ('0' );
438
504
/* Actual output */
439
- while (len -- > 0 )
440
- PUTC (* s ++ );
505
+ if (flags & WIDE ) {
506
+ const u16 * ws = (const u16 * )s ;
507
+
508
+ while (len -- > 0 ) {
509
+ u32 c32 = utf16_to_utf32 (& ws );
510
+ u8 * s8 ;
511
+ size_t clen ;
512
+
513
+ if (c32 < 0x80 ) {
514
+ PUTC (c32 );
515
+ continue ;
516
+ }
517
+
518
+ /* Number of trailing octets */
519
+ clen = 1 + (c32 >= 0x800 ) + (c32 >= 0x10000 );
520
+
521
+ len -= clen ;
522
+ s8 = (u8 * )& buf [pos ];
523
+
524
+ /* Avoid writing partial character */
525
+ PUTC ('\0' );
526
+ pos += clen ;
527
+ if (pos >= size )
528
+ continue ;
529
+
530
+ /* Set high bits of leading octet */
531
+ * s8 = (0xf00 >> 1 ) >> clen ;
532
+ /* Write trailing octets in reverse order */
533
+ for (s8 += clen ; clen ; -- clen , c32 >>= 6 )
534
+ * s8 -- = 0x80 | (c32 & 0x3f );
535
+ /* Set low bits of leading octet */
536
+ * s8 |= c32 ;
537
+ }
538
+ } else {
539
+ while (len -- > 0 )
540
+ PUTC (* s ++ );
541
+ }
441
542
/* Trailing padding with ' ' */
442
543
while (field_width -- > 0 )
443
544
PUTC (' ' );
0 commit comments