Skip to content

Commit 4b7104f

Browse files
jeffhostetlerdscho
authored andcommitted
msvc: convert environment from/to UTF-16 on the fly
This adds MSVC versions of getenv() and friends. These take UTF-8 arguments and return UTF-8 values, but use the UNICODE versions of the CRT routines. This avoids the need to write to __environ (which is only visible if you statically link to the CRT). This also avoids the CP_ACP conversions performed inside the CRT. It also avoids various memory leaks and problems. Signed-off-by: Jeff Hostetler <[email protected]>
1 parent e3d23f0 commit 4b7104f

File tree

2 files changed

+249
-0
lines changed

2 files changed

+249
-0
lines changed

compat/mingw.c

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,6 +1217,147 @@ static char *path_lookup(const char *cmd, int exe_only)
12171217
return prog;
12181218
}
12191219

1220+
#if defined(_MSC_VER)
1221+
1222+
/* We need a stable sort */
1223+
#ifndef INTERNAL_QSORT
1224+
#include "qsort.c"
1225+
#endif
1226+
1227+
/* Compare only keys */
1228+
static int wenvcmp(const void *a, const void *b)
1229+
{
1230+
wchar_t *p = *(wchar_t **)a, *q = *(wchar_t **)b;
1231+
size_t p_len, q_len;
1232+
int ret;
1233+
1234+
/* Find end of keys */
1235+
for (p_len = 0; p[p_len] && p[p_len] != L'='; p_len++)
1236+
; /* do nothing */
1237+
for (q_len = 0; q[q_len] && q[q_len] != L'='; q_len++)
1238+
; /* do nothing */
1239+
1240+
/* Are keys identical (modulo case)? */
1241+
if (p_len == q_len && !_wcsnicmp(p, q, p_len))
1242+
return 0;
1243+
1244+
ret = _wcsnicmp(p, q, p_len < q_len ? p_len : q_len);
1245+
return ret ? ret : (p_len < q_len ? -1 : +1);
1246+
}
1247+
1248+
/*
1249+
* Build an environment block combining the inherited environment
1250+
* merged with the given list of settings.
1251+
*
1252+
* Values of the form "KEY=VALUE" in deltaenv override inherited values.
1253+
* Values of the form "KEY" in deltaenv delete inherited values.
1254+
*
1255+
* Multiple entries in deltaenv for the same key are explicitly allowed.
1256+
*
1257+
* We return a contiguous block of UNICODE strings with a final trailing
1258+
* zero word.
1259+
*/
1260+
static wchar_t *make_environment_block(char **deltaenv)
1261+
{
1262+
/*
1263+
* The CRT (at least as of UCRT) secretly declares "_wenviron"
1264+
* as a function that returns a pointer to a mostly static table.
1265+
* Grab the pointer and cache it for the duration of our loop.
1266+
*/
1267+
const wchar_t *wenv = GetEnvironmentStringsW(), *p;
1268+
size_t delta_size = 0, size = 1; /* for extra NUL at the end */
1269+
1270+
wchar_t **array = NULL;
1271+
size_t alloc = 0, nr = 0, i;
1272+
1273+
const char *p2;
1274+
wchar_t *wdeltaenv;
1275+
1276+
wchar_t *result, *p3;
1277+
1278+
/*
1279+
* If there is no deltaenv to apply, simply return a copy
1280+
*/
1281+
if (!deltaenv || !*deltaenv) {
1282+
for (p = wenv; p && *p; ) {
1283+
size_t s = wcslen(p) + 1;
1284+
size += s;
1285+
p += s;
1286+
}
1287+
1288+
ALLOC_ARRAY(result, size);
1289+
memcpy(result, wenv, size * sizeof(*wenv));
1290+
FreeEnvironmentStringsW(wenv);
1291+
return result;
1292+
}
1293+
1294+
/*
1295+
* If there is a deltaenv, let's accumulate all keys into `array`,
1296+
* sort them using the stable git_qsort() and then copy, skipping
1297+
* duplicate keys
1298+
*/
1299+
1300+
for (p = wenv; p && *p; ) {
1301+
size_t s = wcslen(p) + 1;
1302+
size += s;
1303+
ALLOC_GROW(array, nr + 1, alloc);
1304+
array[nr++] = p;
1305+
p += s;
1306+
}
1307+
1308+
/* (over-)assess size needed for wchar version of deltaenv */
1309+
for (i = 0; deltaenv[i]; i++) {
1310+
size_t s = strlen(deltaenv[i]) + 1;
1311+
delta_size += s;
1312+
}
1313+
1314+
ALLOC_ARRAY(wdeltaenv, delta_size);
1315+
1316+
/* convert the deltaenv, appending to array */
1317+
for (i = 0, p3 = wdeltaenv; deltaenv[i]; i++) {
1318+
size_t s = strlen(deltaenv[i]) + 1, wlen;
1319+
wlen = xutftowcs(p3, deltaenv[i], s * 2);
1320+
1321+
ALLOC_GROW(array, nr + 1, alloc);
1322+
array[nr++] = p3;
1323+
1324+
p3 += wlen + 1;
1325+
}
1326+
1327+
git_qsort(array, nr, sizeof(*array), wenvcmp);
1328+
ALLOC_ARRAY(result, size + delta_size);
1329+
1330+
for (p3 = result, i = 0; i < nr; i++) {
1331+
wchar_t *equal = wcschr(array[i], L'=');;
1332+
1333+
/* Skip "to delete" entry */
1334+
if (!equal)
1335+
continue;
1336+
1337+
p = array[i];
1338+
1339+
/* Skip any duplicate */
1340+
if (i + 1 < nr) {
1341+
wchar_t *next = array[i + 1];
1342+
size_t n = equal - p;
1343+
1344+
if (!_wcsnicmp(p, next, n) && (!next[n] || next[n] == L'='))
1345+
continue;
1346+
}
1347+
1348+
size = wcslen(p) + 1;
1349+
memcpy(p3, p, size * sizeof(*p));
1350+
p3 += size;
1351+
}
1352+
*p3 = L'\0';
1353+
1354+
free(array);
1355+
FreeEnvironmentStringsW(wenv);
1356+
return result;
1357+
}
1358+
1359+
#else
1360+
12201361
static int do_putenv(char **env, const char *name, int size, int free_old);
12211362

12221363
/* used number of elements of environ array, including terminating NULL */
@@ -1263,6 +1404,7 @@ static wchar_t *make_environment_block(char **deltaenv)
12631404
free(tmpenv);
12641405
return wenvblk;
12651406
}
1407+
#endif
12661408

12671409
static void do_unset_environment_variables(void)
12681410
{
@@ -1553,6 +1695,70 @@ int mingw_kill(pid_t pid, int sig)
15531695
return -1;
15541696
}
15551697

1698+
#if defined(_MSC_VER)
1699+
1700+
/* UTF8 versions of getenv and putenv (and unsetenv).
1701+
* Internally, they use the CRT's stock UNICODE routines
1702+
* to avoid data loss.
1703+
*
1704+
* Unlike the mingw version, we DO NOT directly write to
1705+
* the CRT variables. We also DO NOT try to manage/replace
1706+
* the CRT storage.
1707+
*/
1708+
char *msc_getenv(const char *name)
1709+
{
1710+
int len_key, len_value;
1711+
wchar_t *w_key;
1712+
char *value;
1713+
const wchar_t *w_value;
1714+
1715+
if (!name || !*name)
1716+
return NULL;
1717+
1718+
len_key = strlen(name) + 1;
1719+
w_key = calloc(len_key, sizeof(wchar_t));
1720+
xutftowcs(w_key, name, len_key);
1721+
w_value = _wgetenv(w_key);
1722+
free(w_key);
1723+
1724+
if (!w_value)
1725+
return NULL;
1726+
1727+
len_value = wcslen(w_value) * 3 + 1;
1728+
value = calloc(len_value, sizeof(char));
1729+
xwcstoutf(value, w_value, len_value);
1730+
1731+
/* TODO Warning: We return "value" which is an allocated
1732+
* value and the caller is NOT expecting to have to free
1733+
* it, so we leak memory.
1734+
*/
1735+
return value;
1736+
}
1737+
1738+
int msc_putenv(const char *name)
1739+
{
1740+
int len, result;
1741+
char *equal;
1742+
wchar_t *wide;
1743+
1744+
if (!name || !*name)
1745+
return 0;
1746+
1747+
len = strlen(name);
1748+
equal = strchr(name, '=');
1749+
wide = calloc(len+1+!equal, sizeof(wchar_t));
1750+
xutftowcs(wide, name, len+1);
1751+
if (!equal)
1752+
wcscat(wide, L"=");
1753+
1754+
result = _wputenv(wide);
1755+
1756+
free(wide);
1757+
return result;
1758+
}
1759+
1760+
#else
1761+
15561762
/*
15571763
* Compare environment entries by key (i.e. stopping at '=' or '\0').
15581764
*/
@@ -1681,6 +1887,8 @@ int mingw_putenv(const char *namevalue)
16811887
return 0;
16821888
}
16831889

1890+
#endif
1891+
16841892
/*
16851893
* Note, this isn't a complete replacement for getaddrinfo. It assumes
16861894
* that service contains a numerical port, or that it is null. It

compat/mingw.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,12 +265,53 @@ char *mingw_getcwd(char *pointer, int len);
265265
#error "NO_UNSETENV is incompatible with the MinGW startup code!"
266266
#endif
267267

268+
#if defined(_MSC_VER)
269+
/*
270+
* We bind *env() routines (even the mingw_ ones) to private msc_ versions.
271+
* These talk to the CRT using UNICODE/wchar_t, but maintain the original
272+
* narrow-char API.
273+
*
274+
* Note that the MSCRT maintains both ANSI (getenv()) and UNICODE (_wgetenv())
275+
* routines and stores both versions of each environment variable in parallel
276+
* (and secretly updates both when you set one or the other), but it uses CP_ACP
277+
* to do the conversion rather than CP_UTF8.
278+
*
279+
* Since everything in the git code base is UTF8, we define the msc_ routines
280+
* to access the CRT using the UNICODE routines and manually convert them to
281+
* UTF8. This also avoids round-trip problems.
282+
*
283+
* This also helps with our linkage, since "_wenviron" is publicly exported
284+
* from the CRT. But to access "_environ" we would have to statically link
285+
* to the CRT (/MT).
286+
*
287+
* We also use "wmain(argc,argv,env)" and get the initial UNICODE setup for us.
288+
* This avoids the need for the msc_startup() to import and convert the
289+
* inherited environment.
290+
*
291+
* We require NO_SETENV (and let gitsetenv() call our msc_putenv).
292+
*/
293+
#define getenv msc_getenv
294+
#define putenv msc_putenv
295+
#define unsetenv msc_putenv
296+
#define mingw_getenv msc_getenv
297+
#define mingw_putenv msc_putenv
298+
char *msc_getenv(const char *name);
299+
int msc_putenv(const char *name);
300+
301+
#ifndef NO_SETENV
302+
#error "NO_SETENV is required for MSC startup code!"
303+
#endif
304+
305+
#else
306+
268307
char *mingw_getenv(const char *name);
269308
#define getenv mingw_getenv
270309
int mingw_putenv(const char *namevalue);
271310
#define putenv mingw_putenv
272311
#define unsetenv mingw_putenv
273312

313+
#endif
314+
274315
int mingw_gethostname(char *host, int namelen);
275316
#define gethostname mingw_gethostname
276317

0 commit comments

Comments
 (0)