Skip to content
This repository was archived by the owner on Nov 9, 2017. It is now read-only.

Commit dc63934

Browse files
committed
Merge 'unicode' into HEAD
2 parents 6ac4a73 + cd0792a commit dc63934

File tree

13 files changed

+1049
-454
lines changed

13 files changed

+1049
-454
lines changed

compat/mingw.c

Lines changed: 493 additions & 214 deletions
Large diffs are not rendered by default.

compat/mingw.h

Lines changed: 128 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,7 @@ static inline int fcntl(int fd, int cmd, ...)
118118
* simple adaptors
119119
*/
120120

121-
static inline int mingw_mkdir(const char *path, int mode)
122-
{
123-
return mkdir(path);
124-
}
121+
int mingw_mkdir(const char *path, int mode);
125122
#define mkdir mingw_mkdir
126123

127124
#define WNOHANG 1
@@ -192,11 +189,27 @@ FILE *mingw_freopen (const char *filename, const char *otype, FILE *stream);
192189
int mingw_fflush(FILE *stream);
193190
#define fflush mingw_fflush
194191

192+
int mingw_access(const char *filename, int mode);
193+
#undef access
194+
#define access mingw_access
195+
196+
int mingw_chdir(const char *dirname);
197+
#define chdir mingw_chdir
198+
199+
int mingw_chmod(const char *filename, int mode);
200+
#define chmod mingw_chmod
201+
202+
char *mingw_mktemp(char *template);
203+
#define mktemp mingw_mktemp
204+
195205
char *mingw_getcwd(char *pointer, int len);
196206
#define getcwd mingw_getcwd
197207

198208
char *mingw_getenv(const char *name);
199209
#define getenv mingw_getenv
210+
int mingw_putenv(const char *namevalue);
211+
#define putenv mingw_putenv
212+
#define unsetenv mingw_putenv
200213

201214
int mingw_gethostname(char *host, int namelen);
202215
#define gethostname mingw_gethostname
@@ -317,12 +330,8 @@ int mingw_raise(int sig);
317330
* ANSI emulation wrappers
318331
*/
319332

320-
int winansi_fputs(const char *str, FILE *stream);
321-
int winansi_printf(const char *format, ...) __attribute__((format (printf, 1, 2)));
322-
int winansi_fprintf(FILE *stream, const char *format, ...) __attribute__((format (printf, 2, 3)));
323-
#define fputs winansi_fputs
324-
#define printf(...) winansi_printf(__VA_ARGS__)
325-
#define fprintf(...) winansi_fprintf(__VA_ARGS__)
333+
void winansi_init(void);
334+
HANDLE winansi_get_osfhandle(int fd);
326335

327336
/*
328337
* git specific compatibility
@@ -348,12 +357,112 @@ int mingw_offset_1st_component(const char *path);
348357
void mingw_open_html(const char *path);
349358
#define open_html mingw_open_html
350359

351-
/*
352-
* helpers
360+
void mingw_mark_as_git_dir(const char *dir);
361+
#define mark_as_git_dir mingw_mark_as_git_dir
362+
363+
/**
364+
* Converts UTF-8 encoded string to UTF-16LE.
365+
*
366+
* To support repositories with legacy-encoded file names, invalid UTF-8 bytes
367+
* 0xa0 - 0xff are converted to corresponding printable Unicode chars \u00a0 -
368+
* \u00ff, and invalid UTF-8 bytes 0x80 - 0x9f (which would make non-printable
369+
* Unicode) are converted to hex-code.
370+
*
371+
* Lead-bytes not followed by an appropriate number of trail-bytes, over-long
372+
* encodings and 4-byte encodings > \u10ffff are detected as invalid UTF-8.
373+
*
374+
* Maximum space requirement for the target buffer is two wide chars per UTF-8
375+
* char (((strlen(utf) * 2) + 1) [* sizeof(wchar_t)]).
376+
*
377+
* The maximum space is needed only if the entire input string consists of
378+
* invalid UTF-8 bytes in range 0x80-0x9f, as per the following table:
379+
*
380+
* | | UTF-8 | UTF-16 |
381+
* Code point | UTF-8 sequence | bytes | words | ratio
382+
* --------------+-------------------+-------+--------+-------
383+
* 000000-00007f | 0-7f | 1 | 1 | 1
384+
* 000080-0007ff | c2-df + 80-bf | 2 | 1 | 0.5
385+
* 000800-00ffff | e0-ef + 2 * 80-bf | 3 | 1 | 0.33
386+
* 010000-10ffff | f0-f4 + 3 * 80-bf | 4 | 2 (a) | 0.5
387+
* invalid | 80-9f | 1 | 2 (b) | 2
388+
* invalid | a0-ff | 1 | 1 | 1
389+
*
390+
* (a) encoded as UTF-16 surrogate pair
391+
* (b) encoded as two hex digits
392+
*
393+
* Note that, while the UTF-8 encoding scheme can be extended to 5-byte, 6-byte
394+
* or even indefinite-byte sequences, the largest valid code point \u10ffff
395+
* encodes as only 4 UTF-8 bytes.
396+
*
397+
* Parameters:
398+
* wcs: wide char target buffer
399+
* utf: string to convert
400+
* wcslen: size of target buffer (in wchar_t's)
401+
* utflen: size of string to convert, or -1 if 0-terminated
402+
*
403+
* Returns:
404+
* length of converted string (_wcslen(wcs)), or -1 on failure
405+
*
406+
* Errors:
407+
* EINVAL: one of the input parameters is invalid (e.g. NULL)
408+
* ERANGE: the output buffer is too small
409+
*/
410+
int xutftowcsn(wchar_t *wcs, const char *utf, size_t wcslen, int utflen);
411+
412+
/**
413+
* Simplified variant of xutftowcsn, assumes input string is \0-terminated.
414+
*/
415+
static inline int xutftowcs(wchar_t *wcs, const char *utf, size_t wcslen)
416+
{
417+
return xutftowcsn(wcs, utf, wcslen, -1);
418+
}
419+
420+
/**
421+
* Simplified file system specific variant of xutftowcsn, assumes output
422+
* buffer size is MAX_PATH wide chars and input string is \0-terminated,
423+
* fails with ENAMETOOLONG if input string is too long.
353424
*/
425+
static inline int xutftowcs_path(wchar_t *wcs, const char *utf)
426+
{
427+
int result = xutftowcsn(wcs, utf, MAX_PATH, -1);
428+
if (result < 0 && errno == ERANGE)
429+
errno = ENAMETOOLONG;
430+
return result;
431+
}
354432

355-
char **make_augmented_environ(const char *const *vars);
356-
void free_environ(char **env);
433+
/**
434+
* Converts UTF-16LE encoded string to UTF-8.
435+
*
436+
* Maximum space requirement for the target buffer is three UTF-8 chars per
437+
* wide char ((_wcslen(wcs) * 3) + 1).
438+
*
439+
* The maximum space is needed only if the entire input string consists of
440+
* UTF-16 words in range 0x0800-0xd7ff or 0xe000-0xffff (i.e. \u0800-\uffff
441+
* modulo surrogate pairs), as per the following table:
442+
*
443+
* | | UTF-16 | UTF-8 |
444+
* Code point | UTF-16 sequence | words | bytes | ratio
445+
* --------------+-----------------------+--------+-------+-------
446+
* 000000-00007f | 0000-007f | 1 | 1 | 1
447+
* 000080-0007ff | 0080-07ff | 1 | 2 | 2
448+
* 000800-00ffff | 0800-d7ff / e000-ffff | 1 | 3 | 3
449+
* 010000-10ffff | d800-dbff + dc00-dfff | 2 | 4 | 2
450+
*
451+
* Note that invalid code points > 10ffff cannot be represented in UTF-16.
452+
*
453+
* Parameters:
454+
* utf: target buffer
455+
* wcs: wide string to convert
456+
* utflen: size of target buffer
457+
*
458+
* Returns:
459+
* length of converted string, or -1 on failure
460+
*
461+
* Errors:
462+
* EINVAL: one of the input parameters is invalid (e.g. NULL)
463+
* ERANGE: the output buffer is too small
464+
*/
465+
int xwcstoutf(char *utf, const wchar_t *wcs, size_t utflen);
357466

358467
/*
359468
* A critical section used in the implementation of the spawn
@@ -363,22 +472,16 @@ void free_environ(char **env);
363472
extern CRITICAL_SECTION pinfo_cs;
364473

365474
/*
366-
* A replacement of main() that ensures that argv[0] has a path
367-
* and that default fmode and std(in|out|err) are in binary mode
475+
* A replacement of main() that adds win32 specific initialization.
368476
*/
369477

478+
void mingw_startup();
370479
#define main(c,v) dummy_decl_mingw_main(); \
371480
static int mingw_main(c,v); \
372-
int main(int argc, char **argv) \
481+
int main(c,v) \
373482
{ \
374-
extern CRITICAL_SECTION pinfo_cs; \
375-
_fmode = _O_BINARY; \
376-
_setmode(_fileno(stdin), _O_BINARY); \
377-
_setmode(_fileno(stdout), _O_BINARY); \
378-
_setmode(_fileno(stderr), _O_BINARY); \
379-
argv[0] = xstrdup(_pgmptr); \
380-
InitializeCriticalSection(&pinfo_cs); \
381-
return mingw_main(argc, argv); \
483+
mingw_startup(); \
484+
return mingw_main(__argc, __argv); \
382485
} \
383486
static int mingw_main(c,v)
384487

compat/win32/dirent.c

Lines changed: 50 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,81 @@
1-
#include "../git-compat-util.h"
2-
#include "dirent.h"
1+
#include "../../git-compat-util.h"
32

43
struct DIR {
54
struct dirent dd_dir; /* includes d_type */
65
HANDLE dd_handle; /* FindFirstFile handle */
76
int dd_stat; /* 0-based index */
8-
char dd_name[1]; /* extend struct */
97
};
108

9+
static inline void finddata2dirent(struct dirent *ent, WIN32_FIND_DATAW *fdata)
10+
{
11+
/* convert UTF-16 name to UTF-8 */
12+
xwcstoutf(ent->d_name, fdata->cFileName, sizeof(ent->d_name));
13+
14+
/* Set file type, based on WIN32_FIND_DATA */
15+
if (fdata->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
16+
ent->d_type = DT_DIR;
17+
else
18+
ent->d_type = DT_REG;
19+
}
20+
1121
DIR *opendir(const char *name)
1222
{
13-
DWORD attrs = GetFileAttributesA(name);
23+
wchar_t pattern[MAX_PATH + 2]; /* + 2 for '/' '*' */
24+
WIN32_FIND_DATAW fdata;
25+
HANDLE h;
1426
int len;
15-
DIR *p;
27+
DIR *dir;
1628

17-
/* check for valid path */
18-
if (attrs == INVALID_FILE_ATTRIBUTES) {
19-
errno = ENOENT;
29+
/* convert name to UTF-16 and check length < MAX_PATH */
30+
if ((len = xutftowcs_path(pattern, name)) < 0)
2031
return NULL;
21-
}
2232

23-
/* check if it's a directory */
24-
if (!(attrs & FILE_ATTRIBUTE_DIRECTORY)) {
25-
errno = ENOTDIR;
33+
/* append optional '/' and wildcard '*' */
34+
if (len && !is_dir_sep(pattern[len - 1]))
35+
pattern[len++] = '/';
36+
pattern[len++] = '*';
37+
pattern[len] = 0;
38+
39+
/* open find handle */
40+
h = FindFirstFileW(pattern, &fdata);
41+
if (h == INVALID_HANDLE_VALUE) {
42+
DWORD err = GetLastError();
43+
errno = (err == ERROR_DIRECTORY) ? ENOTDIR : err_win_to_posix(err);
2644
return NULL;
2745
}
2846

29-
/* check that the pattern won't be too long for FindFirstFileA */
30-
len = strlen(name);
31-
if (is_dir_sep(name[len - 1]))
32-
len--;
33-
if (len + 2 >= MAX_PATH) {
34-
errno = ENAMETOOLONG;
35-
return NULL;
36-
}
37-
38-
p = malloc(sizeof(DIR) + len + 2);
39-
if (!p)
40-
return NULL;
41-
42-
memset(p, 0, sizeof(DIR) + len + 2);
43-
strcpy(p->dd_name, name);
44-
p->dd_name[len] = '/';
45-
p->dd_name[len+1] = '*';
46-
47-
p->dd_handle = INVALID_HANDLE_VALUE;
48-
return p;
47+
/* initialize DIR structure and copy first dir entry */
48+
dir = xmalloc(sizeof(DIR));
49+
dir->dd_handle = h;
50+
dir->dd_stat = 0;
51+
finddata2dirent(&dir->dd_dir, &fdata);
52+
return dir;
4953
}
5054

5155
struct dirent *readdir(DIR *dir)
5256
{
53-
WIN32_FIND_DATAA buf;
54-
HANDLE handle;
55-
56-
if (!dir || !dir->dd_handle) {
57+
if (!dir) {
5758
errno = EBADF; /* No set_errno for mingw */
5859
return NULL;
5960
}
6061

61-
if (dir->dd_handle == INVALID_HANDLE_VALUE && dir->dd_stat == 0) {
62-
DWORD lasterr;
63-
handle = FindFirstFileA(dir->dd_name, &buf);
64-
lasterr = GetLastError();
65-
dir->dd_handle = handle;
66-
if (handle == INVALID_HANDLE_VALUE && (lasterr != ERROR_NO_MORE_FILES)) {
67-
errno = err_win_to_posix(lasterr);
62+
/* if first entry, dirent has already been set up by opendir */
63+
if (dir->dd_stat) {
64+
/* get next entry and convert from WIN32_FIND_DATA to dirent */
65+
WIN32_FIND_DATAW fdata;
66+
if (FindNextFileW(dir->dd_handle, &fdata)) {
67+
finddata2dirent(&dir->dd_dir, &fdata);
68+
} else {
69+
DWORD lasterr = GetLastError();
70+
/* POSIX says you shouldn't set errno when readdir can't
71+
find any more files; so, if another error we leave it set. */
72+
if (lasterr != ERROR_NO_MORE_FILES)
73+
errno = err_win_to_posix(lasterr);
6874
return NULL;
6975
}
70-
} else if (dir->dd_handle == INVALID_HANDLE_VALUE) {
71-
return NULL;
72-
} else if (!FindNextFileA(dir->dd_handle, &buf)) {
73-
DWORD lasterr = GetLastError();
74-
FindClose(dir->dd_handle);
75-
dir->dd_handle = INVALID_HANDLE_VALUE;
76-
/* POSIX says you shouldn't set errno when readdir can't
77-
find any more files; so, if another error we leave it set. */
78-
if (lasterr != ERROR_NO_MORE_FILES)
79-
errno = err_win_to_posix(lasterr);
80-
return NULL;
8176
}
8277

83-
/* We get here if `buf' contains valid data. */
84-
strcpy(dir->dd_dir.d_name, buf.cFileName);
8578
++dir->dd_stat;
86-
87-
/* Set file type, based on WIN32_FIND_DATA */
88-
dir->dd_dir.d_type = 0;
89-
if (buf.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
90-
dir->dd_dir.d_type |= DT_DIR;
91-
else
92-
dir->dd_dir.d_type |= DT_REG;
93-
9479
return &dir->dd_dir;
9580
}
9681

@@ -101,8 +86,7 @@ int closedir(DIR *dir)
10186
return -1;
10287
}
10388

104-
if (dir->dd_handle != INVALID_HANDLE_VALUE)
105-
FindClose(dir->dd_handle);
89+
FindClose(dir->dd_handle);
10690
free(dir);
10791
return 0;
10892
}

compat/win32/dirent.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,8 @@ typedef struct DIR DIR;
99
#define DT_LNK 3
1010

1111
struct dirent {
12-
long d_ino; /* Always zero. */
13-
char d_name[FILENAME_MAX]; /* File name. */
14-
union {
15-
unsigned short d_reclen; /* Always zero. */
16-
unsigned char d_type; /* Reimplementation adds this */
17-
};
12+
unsigned char d_type; /* file type to prevent lstat after readdir */
13+
char d_name[MAX_PATH * 3]; /* file name (* 3 for UTF-8 conversion) */
1814
};
1915

2016
DIR *opendir(const char *dirname);

0 commit comments

Comments
 (0)