@@ -317,12 +317,8 @@ int mingw_raise(int sig);
317
317
* ANSI emulation wrappers
318
318
*/
319
319
320
- int winansi_fputs (const char * str , FILE * stream );
321
- int winansi_printf (const char * format , ...) __attribute__((format (printf , 1 , 2 )));
322
- int winansi_fprintf (FILE * stream , const char * format , ...) __attribute__((format (printf , 2 , 3 )));
323
- #define fputs winansi_fputs
324
- #define printf (...) winansi_printf(__VA_ARGS__)
325
- #define fprintf (...) winansi_fprintf(__VA_ARGS__)
320
+ void winansi_init (void );
321
+ HANDLE winansi_get_osfhandle (int fd );
326
322
327
323
/*
328
324
* git specific compatibility
@@ -355,6 +351,110 @@ void mingw_open_html(const char *path);
355
351
char * * make_augmented_environ (const char * const * vars );
356
352
void free_environ (char * * env );
357
353
354
+ /**
355
+ * Converts UTF-8 encoded string to UTF-16LE.
356
+ *
357
+ * To support repositories with legacy-encoded file names, invalid UTF-8 bytes
358
+ * 0xa0 - 0xff are converted to corresponding printable Unicode chars \u00a0 -
359
+ * \u00ff, and invalid UTF-8 bytes 0x80 - 0x9f (which would make non-printable
360
+ * Unicode) are converted to hex-code.
361
+ *
362
+ * Lead-bytes not followed by an appropriate number of trail-bytes, over-long
363
+ * encodings and 4-byte encodings > \u10ffff are detected as invalid UTF-8.
364
+ *
365
+ * Maximum space requirement for the target buffer is two wide chars per UTF-8
366
+ * char (((strlen(utf) * 2) + 1) [* sizeof(wchar_t)]).
367
+ *
368
+ * The maximum space is needed only if the entire input string consists of
369
+ * invalid UTF-8 bytes in range 0x80-0x9f, as per the following table:
370
+ *
371
+ * | | UTF-8 | UTF-16 |
372
+ * Code point | UTF-8 sequence | bytes | words | ratio
373
+ * --------------+-------------------+-------+--------+-------
374
+ * 000000-00007f | 0-7f | 1 | 1 | 1
375
+ * 000080-0007ff | c2-df + 80-bf | 2 | 1 | 0.5
376
+ * 000800-00ffff | e0-ef + 2 * 80-bf | 3 | 1 | 0.33
377
+ * 010000-10ffff | f0-f4 + 3 * 80-bf | 4 | 2 (a) | 0.5
378
+ * invalid | 80-9f | 1 | 2 (b) | 2
379
+ * invalid | a0-ff | 1 | 1 | 1
380
+ *
381
+ * (a) encoded as UTF-16 surrogate pair
382
+ * (b) encoded as two hex digits
383
+ *
384
+ * Note that, while the UTF-8 encoding scheme can be extended to 5-byte, 6-byte
385
+ * or even indefinite-byte sequences, the largest valid code point \u10ffff
386
+ * encodes as only 4 UTF-8 bytes.
387
+ *
388
+ * Parameters:
389
+ * wcs: wide char target buffer
390
+ * utf: string to convert
391
+ * wcslen: size of target buffer (in wchar_t's)
392
+ * utflen: size of string to convert, or -1 if 0-terminated
393
+ *
394
+ * Returns:
395
+ * length of converted string (_wcslen(wcs)), or -1 on failure
396
+ *
397
+ * Errors:
398
+ * EINVAL: one of the input parameters is invalid (e.g. NULL)
399
+ * ERANGE: the output buffer is too small
400
+ */
401
+ int xutftowcsn (wchar_t * wcs , const char * utf , size_t wcslen , int utflen );
402
+
403
+ /**
404
+ * Simplified variant of xutftowcsn, assumes input string is \0-terminated.
405
+ */
406
+ static inline int xutftowcs (wchar_t * wcs , const char * utf , size_t wcslen )
407
+ {
408
+ return xutftowcsn (wcs , utf , wcslen , -1 );
409
+ }
410
+
411
+ /**
412
+ * Simplified file system specific variant of xutftowcsn, assumes output
413
+ * buffer size is MAX_PATH wide chars and input string is \0-terminated,
414
+ * fails with ENAMETOOLONG if input string is too long.
415
+ */
416
+ static inline int xutftowcs_path (wchar_t * wcs , const char * utf )
417
+ {
418
+ int result = xutftowcsn (wcs , utf , MAX_PATH , -1 );
419
+ if (result < 0 && errno == ERANGE )
420
+ errno = ENAMETOOLONG ;
421
+ return result ;
422
+ }
423
+
424
+ /**
425
+ * Converts UTF-16LE encoded string to UTF-8.
426
+ *
427
+ * Maximum space requirement for the target buffer is three UTF-8 chars per
428
+ * wide char ((_wcslen(wcs) * 3) + 1).
429
+ *
430
+ * The maximum space is needed only if the entire input string consists of
431
+ * UTF-16 words in range 0x0800-0xd7ff or 0xe000-0xffff (i.e. \u0800-\uffff
432
+ * modulo surrogate pairs), as per the following table:
433
+ *
434
+ * | | UTF-16 | UTF-8 |
435
+ * Code point | UTF-16 sequence | words | bytes | ratio
436
+ * --------------+-----------------------+--------+-------+-------
437
+ * 000000-00007f | 0000-007f | 1 | 1 | 1
438
+ * 000080-0007ff | 0080-07ff | 1 | 2 | 2
439
+ * 000800-00ffff | 0800-d7ff / e000-ffff | 1 | 3 | 3
440
+ * 010000-10ffff | d800-dbff + dc00-dfff | 2 | 4 | 2
441
+ *
442
+ * Note that invalid code points > 10ffff cannot be represented in UTF-16.
443
+ *
444
+ * Parameters:
445
+ * utf: target buffer
446
+ * wcs: wide string to convert
447
+ * utflen: size of target buffer
448
+ *
449
+ * Returns:
450
+ * length of converted string, or -1 on failure
451
+ *
452
+ * Errors:
453
+ * EINVAL: one of the input parameters is invalid (e.g. NULL)
454
+ * ERANGE: the output buffer is too small
455
+ */
456
+ int xwcstoutf (char * utf , const wchar_t * wcs , size_t utflen );
457
+
358
458
/*
359
459
* A critical section used in the implementation of the spawn
360
460
* functions (mingw_spawnv[p]e()) and waitpid(). Intialised in
0 commit comments