@@ -317,12 +317,8 @@ int mingw_raise(int sig);
317317 * ANSI emulation wrappers
318318 */
319319
320- int winansi_fputs (const char * str , FILE * stream );
321- int winansi_printf (const char * format , ...) __attribute__((format (printf , 1 , 2 )));
322- int winansi_fprintf (FILE * stream , const char * format , ...) __attribute__((format (printf , 2 , 3 )));
323- #define fputs winansi_fputs
324- #define printf (...) winansi_printf(__VA_ARGS__)
325- #define fprintf (...) winansi_fprintf(__VA_ARGS__)
320+ void winansi_init (void );
321+ HANDLE winansi_get_osfhandle (int fd );
326322
327323/*
328324 * git specific compatibility
@@ -355,6 +351,110 @@ void mingw_open_html(const char *path);
355351char * * make_augmented_environ (const char * const * vars );
356352void free_environ (char * * env );
357353
354+ /**
355+ * Converts UTF-8 encoded string to UTF-16LE.
356+ *
357+ * To support repositories with legacy-encoded file names, invalid UTF-8 bytes
358+ * 0xa0 - 0xff are converted to corresponding printable Unicode chars \u00a0 -
359+ * \u00ff, and invalid UTF-8 bytes 0x80 - 0x9f (which would make non-printable
360+ * Unicode) are converted to hex-code.
361+ *
362+ * Lead-bytes not followed by an appropriate number of trail-bytes, over-long
363+ * encodings and 4-byte encodings > \u10ffff are detected as invalid UTF-8.
364+ *
365+ * Maximum space requirement for the target buffer is two wide chars per UTF-8
366+ * char (((strlen(utf) * 2) + 1) [* sizeof(wchar_t)]).
367+ *
368+ * The maximum space is needed only if the entire input string consists of
369+ * invalid UTF-8 bytes in range 0x80-0x9f, as per the following table:
370+ *
371+ * | | UTF-8 | UTF-16 |
372+ * Code point | UTF-8 sequence | bytes | words | ratio
373+ * --------------+-------------------+-------+--------+-------
374+ * 000000-00007f | 0-7f | 1 | 1 | 1
375+ * 000080-0007ff | c2-df + 80-bf | 2 | 1 | 0.5
376+ * 000800-00ffff | e0-ef + 2 * 80-bf | 3 | 1 | 0.33
377+ * 010000-10ffff | f0-f4 + 3 * 80-bf | 4 | 2 (a) | 0.5
378+ * invalid | 80-9f | 1 | 2 (b) | 2
379+ * invalid | a0-ff | 1 | 1 | 1
380+ *
381+ * (a) encoded as UTF-16 surrogate pair
382+ * (b) encoded as two hex digits
383+ *
384+ * Note that, while the UTF-8 encoding scheme can be extended to 5-byte, 6-byte
385+ * or even indefinite-byte sequences, the largest valid code point \u10ffff
386+ * encodes as only 4 UTF-8 bytes.
387+ *
388+ * Parameters:
389+ * wcs: wide char target buffer
390+ * utf: string to convert
391+ * wcslen: size of target buffer (in wchar_t's)
392+ * utflen: size of string to convert, or -1 if 0-terminated
393+ *
394+ * Returns:
395+ * length of converted string (_wcslen(wcs)), or -1 on failure
396+ *
397+ * Errors:
398+ * EINVAL: one of the input parameters is invalid (e.g. NULL)
399+ * ERANGE: the output buffer is too small
400+ */
401+ int xutftowcsn (wchar_t * wcs , const char * utf , size_t wcslen , int utflen );
402+
403+ /**
404+ * Simplified variant of xutftowcsn, assumes input string is \0-terminated.
405+ */
406+ static inline int xutftowcs (wchar_t * wcs , const char * utf , size_t wcslen )
407+ {
408+ return xutftowcsn (wcs , utf , wcslen , -1 );
409+ }
410+
411+ /**
412+ * Simplified file system specific variant of xutftowcsn, assumes output
413+ * buffer size is MAX_PATH wide chars and input string is \0-terminated,
414+ * fails with ENAMETOOLONG if input string is too long.
415+ */
416+ static inline int xutftowcs_path (wchar_t * wcs , const char * utf )
417+ {
418+ int result = xutftowcsn (wcs , utf , MAX_PATH , -1 );
419+ if (result < 0 && errno == ERANGE )
420+ errno = ENAMETOOLONG ;
421+ return result ;
422+ }
423+
424+ /**
425+ * Converts UTF-16LE encoded string to UTF-8.
426+ *
427+ * Maximum space requirement for the target buffer is three UTF-8 chars per
428+ * wide char ((_wcslen(wcs) * 3) + 1).
429+ *
430+ * The maximum space is needed only if the entire input string consists of
431+ * UTF-16 words in range 0x0800-0xd7ff or 0xe000-0xffff (i.e. \u0800-\uffff
432+ * modulo surrogate pairs), as per the following table:
433+ *
434+ * | | UTF-16 | UTF-8 |
435+ * Code point | UTF-16 sequence | words | bytes | ratio
436+ * --------------+-----------------------+--------+-------+-------
437+ * 000000-00007f | 0000-007f | 1 | 1 | 1
438+ * 000080-0007ff | 0080-07ff | 1 | 2 | 2
439+ * 000800-00ffff | 0800-d7ff / e000-ffff | 1 | 3 | 3
440+ * 010000-10ffff | d800-dbff + dc00-dfff | 2 | 4 | 2
441+ *
442+ * Note that invalid code points > 10ffff cannot be represented in UTF-16.
443+ *
444+ * Parameters:
445+ * utf: target buffer
446+ * wcs: wide string to convert
447+ * utflen: size of target buffer
448+ *
449+ * Returns:
450+ * length of converted string, or -1 on failure
451+ *
452+ * Errors:
453+ * EINVAL: one of the input parameters is invalid (e.g. NULL)
454+ * ERANGE: the output buffer is too small
455+ */
456+ int xwcstoutf (char * utf , const wchar_t * wcs , size_t utflen );
457+
358458/*
359459 * A critical section used in the implementation of the spawn
360460 * functions (mingw_spawnv[p]e()) and waitpid(). Intialised in
0 commit comments