Skip to content

Commit 0474cd1

Browse files
committed
Merge branch 'js/mingw-utf8-env'
Windows fix. * js/mingw-utf8-env: mingw: reencode environment variables on the fly (UTF-16 <-> UTF-8) t7800: fix quoting
2 parents 6c268fd + fe21c6b commit 0474cd1

File tree

3 files changed

+197
-117
lines changed

3 files changed

+197
-117
lines changed

compat/mingw.c

Lines changed: 168 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,44 +1171,121 @@ static char *path_lookup(const char *cmd, int exe_only)
11711171
return prog;
11721172
}
11731173

1174-
static int do_putenv(char **env, const char *name, int size, int free_old);
1174+
static const wchar_t *wcschrnul(const wchar_t *s, wchar_t c)
1175+
{
1176+
while (*s && *s != c)
1177+
s++;
1178+
return s;
1179+
}
1180+
1181+
/* Compare only keys */
1182+
static int wenvcmp(const void *a, const void *b)
1183+
{
1184+
wchar_t *p = *(wchar_t **)a, *q = *(wchar_t **)b;
1185+
size_t p_len, q_len;
1186+
1187+
/* Find the keys */
1188+
p_len = wcschrnul(p, L'=') - p;
1189+
q_len = wcschrnul(q, L'=') - q;
11751190

1176-
/* used number of elements of environ array, including terminating NULL */
1177-
static int environ_size = 0;
1178-
/* allocated size of environ array, in bytes */
1179-
static int environ_alloc = 0;
1191+
/* If the length differs, include the shorter key's NUL */
1192+
if (p_len < q_len)
1193+
p_len++;
1194+
else if (p_len > q_len)
1195+
p_len = q_len + 1;
1196+
1197+
return _wcsnicmp(p, q, p_len);
1198+
}
1199+
1200+
/* We need a stable sort to convert the environment between UTF-16 <-> UTF-8 */
1201+
#ifndef INTERNAL_QSORT
1202+
#include "qsort.c"
1203+
#endif
11801204

11811205
/*
1182-
* Create environment block suitable for CreateProcess. Merges current
1183-
* process environment and the supplied environment changes.
1206+
* Build an environment block combining the inherited environment
1207+
* merged with the given list of settings.
1208+
*
1209+
* Values of the form "KEY=VALUE" in deltaenv override inherited values.
1210+
* Values of the form "KEY" in deltaenv delete inherited values.
1211+
*
1212+
* Multiple entries in deltaenv for the same key are explicitly allowed.
1213+
*
1214+
* We return a contiguous block of UNICODE strings with a final trailing
1215+
* zero word.
11841216
*/
11851217
static wchar_t *make_environment_block(char **deltaenv)
11861218
{
1187-
wchar_t *wenvblk = NULL;
1188-
char **tmpenv;
1189-
int i = 0, size = environ_size, wenvsz = 0, wenvpos = 0;
1219+
wchar_t *wenv = GetEnvironmentStringsW(), *wdeltaenv, *result, *p;
1220+
size_t wlen, s, delta_size, size;
11901221

1191-
while (deltaenv && deltaenv[i])
1192-
i++;
1222+
wchar_t **array = NULL;
1223+
size_t alloc = 0, nr = 0, i;
11931224

1194-
/* copy the environment, leaving space for changes */
1195-
ALLOC_ARRAY(tmpenv, size + i);
1196-
memcpy(tmpenv, environ, size * sizeof(char*));
1225+
size = 1; /* for extra NUL at the end */
1226+
1227+
/* If there is no deltaenv to apply, simply return a copy. */
1228+
if (!deltaenv || !*deltaenv) {
1229+
for (p = wenv; p && *p; ) {
1230+
size_t s = wcslen(p) + 1;
1231+
size += s;
1232+
p += s;
1233+
}
11971234

1198-
/* merge supplied environment changes into the temporary environment */
1199-
for (i = 0; deltaenv && deltaenv[i]; i++)
1200-
size = do_putenv(tmpenv, deltaenv[i], size, 0);
1235+
ALLOC_ARRAY(result, size);
1236+
memcpy(result, wenv, size * sizeof(*wenv));
1237+
FreeEnvironmentStringsW(wenv);
1238+
return result;
1239+
}
12011240

1202-
/* create environment block from temporary environment */
1203-
for (i = 0; tmpenv[i]; i++) {
1204-
size = 2 * strlen(tmpenv[i]) + 2; /* +2 for final \0 */
1205-
ALLOC_GROW(wenvblk, (wenvpos + size) * sizeof(wchar_t), wenvsz);
1206-
wenvpos += xutftowcs(&wenvblk[wenvpos], tmpenv[i], size) + 1;
1241+
/*
1242+
* If there is a deltaenv, let's accumulate all keys into `array`,
1243+
* sort them using the stable git_qsort() and then copy, skipping
1244+
* duplicate keys
1245+
*/
1246+
for (p = wenv; p && *p; ) {
1247+
ALLOC_GROW(array, nr + 1, alloc);
1248+
s = wcslen(p) + 1;
1249+
array[nr++] = p;
1250+
p += s;
1251+
size += s;
12071252
}
1208-
/* add final \0 terminator */
1209-
wenvblk[wenvpos] = 0;
1210-
free(tmpenv);
1211-
return wenvblk;
1253+
1254+
/* (over-)assess size needed for wchar version of deltaenv */
1255+
for (delta_size = 0, i = 0; deltaenv[i]; i++)
1256+
delta_size += strlen(deltaenv[i]) * 2 + 1;
1257+
ALLOC_ARRAY(wdeltaenv, delta_size);
1258+
1259+
/* convert the deltaenv, appending to array */
1260+
for (i = 0, p = wdeltaenv; deltaenv[i]; i++) {
1261+
ALLOC_GROW(array, nr + 1, alloc);
1262+
wlen = xutftowcs(p, deltaenv[i], wdeltaenv + delta_size - p);
1263+
array[nr++] = p;
1264+
p += wlen + 1;
1265+
}
1266+
1267+
git_qsort(array, nr, sizeof(*array), wenvcmp);
1268+
ALLOC_ARRAY(result, size + delta_size);
1269+
1270+
for (p = result, i = 0; i < nr; i++) {
1271+
/* Skip any duplicate keys; last one wins */
1272+
while (i + 1 < nr && !wenvcmp(array + i, array + i + 1))
1273+
i++;
1274+
1275+
/* Skip "to delete" entry */
1276+
if (!wcschr(array[i], L'='))
1277+
continue;
1278+
1279+
size = wcslen(array[i]) + 1;
1280+
memcpy(p, array[i], size * sizeof(*p));
1281+
p += size;
1282+
}
1283+
*p = L'\0';
1284+
1285+
free(array);
1286+
free(wdeltaenv);
1287+
FreeEnvironmentStringsW(wenv);
1288+
return result;
12121289
}
12131290

12141291
static void do_unset_environment_variables(void)
@@ -1474,87 +1551,83 @@ int mingw_kill(pid_t pid, int sig)
14741551
}
14751552

14761553
/*
1477-
* Compare environment entries by key (i.e. stopping at '=' or '\0').
1554+
* UTF-8 versions of getenv(), putenv() and unsetenv().
1555+
* Internally, they use the CRT's stock UNICODE routines
1556+
* to avoid data loss.
14781557
*/
1479-
static int compareenv(const void *v1, const void *v2)
1558+
char *mingw_getenv(const char *name)
14801559
{
1481-
const char *e1 = *(const char**)v1;
1482-
const char *e2 = *(const char**)v2;
1560+
#define GETENV_MAX_RETAIN 30
1561+
static char *values[GETENV_MAX_RETAIN];
1562+
static int value_counter;
1563+
int len_key, len_value;
1564+
wchar_t *w_key;
1565+
char *value;
1566+
wchar_t w_value[32768];
14831567

1484-
for (;;) {
1485-
int c1 = *e1++;
1486-
int c2 = *e2++;
1487-
c1 = (c1 == '=') ? 0 : tolower(c1);
1488-
c2 = (c2 == '=') ? 0 : tolower(c2);
1489-
if (c1 > c2)
1490-
return 1;
1491-
if (c1 < c2)
1492-
return -1;
1493-
if (c1 == 0)
1494-
return 0;
1495-
}
1496-
}
1568+
if (!name || !*name)
1569+
return NULL;
14971570

1498-
static int bsearchenv(char **env, const char *name, size_t size)
1499-
{
1500-
unsigned low = 0, high = size;
1501-
while (low < high) {
1502-
unsigned mid = low + ((high - low) >> 1);
1503-
int cmp = compareenv(&env[mid], &name);
1504-
if (cmp < 0)
1505-
low = mid + 1;
1506-
else if (cmp > 0)
1507-
high = mid;
1508-
else
1509-
return mid;
1571+
len_key = strlen(name) + 1;
1572+
/* We cannot use xcalloc() here because that uses getenv() itself */
1573+
w_key = calloc(len_key, sizeof(wchar_t));
1574+
if (!w_key)
1575+
die("Out of memory, (tried to allocate %u wchar_t's)", len_key);
1576+
xutftowcs(w_key, name, len_key);
1577+
len_value = GetEnvironmentVariableW(w_key, w_value, ARRAY_SIZE(w_value));
1578+
if (!len_value && GetLastError() == ERROR_ENVVAR_NOT_FOUND) {
1579+
free(w_key);
1580+
return NULL;
15101581
}
1511-
return ~low; /* not found, return 1's complement of insert position */
1582+
free(w_key);
1583+
1584+
len_value = len_value * 3 + 1;
1585+
/* We cannot use xcalloc() here because that uses getenv() itself */
1586+
value = calloc(len_value, sizeof(char));
1587+
if (!value)
1588+
die("Out of memory, (tried to allocate %u bytes)", len_value);
1589+
xwcstoutf(value, w_value, len_value);
1590+
1591+
/*
1592+
* We return `value` which is an allocated value and the caller is NOT
1593+
* expecting to have to free it, so we keep a round-robin array,
1594+
* invalidating the buffer after GETENV_MAX_RETAIN getenv() calls.
1595+
*/
1596+
free(values[value_counter]);
1597+
values[value_counter++] = value;
1598+
if (value_counter >= ARRAY_SIZE(values))
1599+
value_counter = 0;
1600+
1601+
return value;
15121602
}
15131603

1514-
/*
1515-
* If name contains '=', then sets the variable, otherwise it unsets it
1516-
* Size includes the terminating NULL. Env must have room for size + 1 entries
1517-
* (in case of insert). Returns the new size. Optionally frees removed entries.
1518-
*/
1519-
static int do_putenv(char **env, const char *name, int size, int free_old)
1604+
int mingw_putenv(const char *namevalue)
15201605
{
1521-
int i = bsearchenv(env, name, size - 1);
1606+
int size;
1607+
wchar_t *wide, *equal;
1608+
BOOL result;
15221609

1523-
/* optionally free removed / replaced entry */
1524-
if (i >= 0 && free_old)
1525-
free(env[i]);
1610+
if (!namevalue || !*namevalue)
1611+
return 0;
15261612

1527-
if (strchr(name, '=')) {
1528-
/* if new value ('key=value') is specified, insert or replace entry */
1529-
if (i < 0) {
1530-
i = ~i;
1531-
memmove(&env[i + 1], &env[i], (size - i) * sizeof(char*));
1532-
size++;
1533-
}
1534-
env[i] = (char*) name;
1535-
} else if (i >= 0) {
1536-
/* otherwise ('key') remove existing entry */
1537-
size--;
1538-
memmove(&env[i], &env[i + 1], (size - i) * sizeof(char*));
1613+
size = strlen(namevalue) * 2 + 1;
1614+
wide = calloc(size, sizeof(wchar_t));
1615+
if (!wide)
1616+
die("Out of memory, (tried to allocate %u wchar_t's)", size);
1617+
xutftowcs(wide, namevalue, size);
1618+
equal = wcschr(wide, L'=');
1619+
if (!equal)
1620+
result = SetEnvironmentVariableW(wide, NULL);
1621+
else {
1622+
*equal = L'\0';
1623+
result = SetEnvironmentVariableW(wide, equal + 1);
15391624
}
1540-
return size;
1541-
}
1625+
free(wide);
15421626

1543-
char *mingw_getenv(const char *name)
1544-
{
1545-
char *value;
1546-
int pos = bsearchenv(environ, name, environ_size - 1);
1547-
if (pos < 0)
1548-
return NULL;
1549-
value = strchr(environ[pos], '=');
1550-
return value ? &value[1] : NULL;
1551-
}
1627+
if (!result)
1628+
errno = err_win_to_posix(GetLastError());
15521629

1553-
int mingw_putenv(const char *namevalue)
1554-
{
1555-
ALLOC_GROW(environ, (environ_size + 1) * sizeof(char*), environ_alloc);
1556-
environ_size = do_putenv(environ, namevalue, environ_size, 1);
1557-
return 0;
1630+
return result ? 0 : -1;
15581631
}
15591632

15601633
/*
@@ -2462,17 +2535,6 @@ void mingw_startup(void)
24622535
maxlen = wcslen(wargv[0]);
24632536
for (i = 1; i < argc; i++)
24642537
maxlen = max(maxlen, wcslen(wargv[i]));
2465-
for (i = 0; wenv[i]; i++)
2466-
maxlen = max(maxlen, wcslen(wenv[i]));
2467-
2468-
/*
2469-
* nedmalloc can't free CRT memory, allocate resizable environment
2470-
* list. Note that xmalloc / xmemdupz etc. call getenv, so we cannot
2471-
* use it while initializing the environment itself.
2472-
*/
2473-
environ_size = i + 1;
2474-
environ_alloc = alloc_nr(environ_size * sizeof(char*));
2475-
environ = malloc_startup(environ_alloc);
24762538

24772539
/* allocate buffer (wchar_t encodes to max 3 UTF-8 bytes) */
24782540
maxlen = 3 * maxlen + 1;
@@ -2481,14 +2543,8 @@ void mingw_startup(void)
24812543
/* convert command line arguments and environment to UTF-8 */
24822544
for (i = 0; i < argc; i++)
24832545
__argv[i] = wcstoutfdup_startup(buffer, wargv[i], maxlen);
2484-
for (i = 0; wenv[i]; i++)
2485-
environ[i] = wcstoutfdup_startup(buffer, wenv[i], maxlen);
2486-
environ[i] = NULL;
24872546
free(buffer);
24882547

2489-
/* sort environment for O(log n) getenv / putenv */
2490-
qsort(environ, i, sizeof(char*), compareenv);
2491-
24922548
/* fix Windows specific environment settings */
24932549
setup_windows_environment();
24942550

compat/mingw.h

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,11 +260,35 @@ char *mingw_mktemp(char *template);
260260
char *mingw_getcwd(char *pointer, int len);
261261
#define getcwd mingw_getcwd
262262

263+
#ifdef NO_UNSETENV
264+
#error "NO_UNSETENV is incompatible with the Windows-specific startup code!"
265+
#endif
266+
267+
/*
268+
* We bind *env() routines (even the mingw_ ones) to private mingw_ versions.
269+
* These talk to the CRT using UNICODE/wchar_t, but maintain the original
270+
* narrow-char API.
271+
*
272+
* Note that the MSCRT maintains both ANSI (getenv()) and UNICODE (_wgetenv())
273+
* routines and stores both versions of each environment variable in parallel
274+
* (and secretly updates both when you set one or the other), but it uses CP_ACP
275+
* to do the conversion rather than CP_UTF8.
276+
*
277+
* Since everything in the git code base is UTF8, we define the mingw_ routines
278+
* to access the CRT using the UNICODE routines and manually convert them to
279+
* UTF8. This also avoids round-trip problems.
280+
*
281+
* This also helps with our linkage, since "_wenviron" is publicly exported
282+
* from the CRT. But to access "_environ" we would have to statically link
283+
* to the CRT (/MT).
284+
*
285+
* We require NO_SETENV (and let gitsetenv() call our mingw_putenv).
286+
*/
287+
#define getenv mingw_getenv
288+
#define putenv mingw_putenv
289+
#define unsetenv mingw_putenv
263290
char *mingw_getenv(const char *name);
264-
#define getenv mingw_getenv
265-
int mingw_putenv(const char *namevalue);
266-
#define putenv mingw_putenv
267-
#define unsetenv mingw_putenv
291+
int mingw_putenv(const char *name);
268292

269293
int mingw_gethostname(char *host, int namelen);
270294
#define gethostname mingw_gethostname

t/t7800-difftool.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ test_expect_success 'difftool --extcmd cat arg1' '
332332
test_expect_success 'difftool --extcmd cat arg2' '
333333
echo branch >expect &&
334334
git difftool --no-prompt \
335-
--extcmd sh\ -c\ \"cat\ \$2\" branch >actual &&
335+
--extcmd sh\ -c\ \"cat\ \\\"\$2\\\"\" branch >actual &&
336336
test_cmp expect actual
337337
'
338338

0 commit comments

Comments
 (0)