Skip to content

Commit b252372

Browse files
hodoulpitsmattkc
andauthored
Fixes Unicode paths on Windows (#1363) (#1527)
* Fixes Unicode paths on Windows Unlike most other platforms, Windows' Unicode is standardized around UTF-16, an encoding not compatible with "char *" arrays common in C/C++. As such, to support Unicode correctly when using Win32 APIs, strings must be converted to and from UTF-16 and the Unicode versions of the APIs must be used over the ANSI versions. This commit introduces the following: - Utility funcions for converting between UTF-8 and UTF-16LE on all platforms: - Platform::Utf8ToUtf16 - Platform::Utf16ToUtf8 - Adds test for these conversion functions to ensure the conversion to and from UTF-8 and UTF-16LE is correct. - Utility wrappers for "std::ifstream" that automatically convert to and from UTF-16 so that filenames requiring Unicode encoding function correctly: - Platform::CreateInputFileStream - Platform::OpenInputFileStream - Moves the file default compute hash function to the Platform class (Platform::CreateFileContentHash) and switches to the Win32 UTF-16 variant on Windows. - Adds the "UNICODE" macro before including "Windows.h" which ensures all functions called are the Unicode variants instead of the default ANSI variants. - Implicitly changes functions such as GetEnvironmentVariable and SetEnvironmentVariable to their Unicode variants. - Changes the following environment variable related functions to their Win32 Unicode variants on Windows: - environ -> _wenviron - _putenv_s -> _wputenv_s - Updates tests using SetEnvironmentVariable to use wide string literals since that function has been switched to the Unicode variant. Signed-off-by: itsmattkc <[email protected]> * Moved UNICODE and _UNICODE definitions to CMake Signed-off-by: itsmattkc <[email protected]> * handle empty strings and use const references Signed-off-by: itsmattkc <[email protected]> * only use wenviron when unicode is enabled Signed-off-by: itsmattkc <[email protected]> * add ANSI Win32 version of Getenv Signed-off-by: itsmattkc <[email protected]> * Add CMake option for compiling with Win32 Unicode support Signed-off-by: itsmattkc <[email protected]> * Throw exception if UTF functions are called on non-Windows platforms Signed-off-by: itsmattkc <[email protected]> * Throw OCIO Exception Signed-off-by: itsmattkc <[email protected]> * Don't run UTF-8/16 conversion test on non-Windows Signed-off-by: itsmattkc <[email protected]> * Fix CMake error Signed-off-by: itsmattkc <[email protected]> * minor CMake adjustment Signed-off-by: itsmattkc <[email protected]> * Clarified CMake option for Win32 unicode Signed-off-by: itsmattkc <[email protected]> * minor improvements Signed-off-by: itsmattkc <[email protected]> * fix macro in cpu test cmake Signed-off-by: itsmattkc <[email protected]> Co-authored-by: itsmattkc <[email protected]>
1 parent 17e52e3 commit b252372

File tree

14 files changed

+328
-83
lines changed

14 files changed

+328
-83
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ option(OCIO_BUILD_JAVA "Specify whether to build java bindings" OFF)
139139

140140
option(OCIO_WARNING_AS_ERROR "Set build error level for CI testing" OFF)
141141

142+
option(OCIO_USE_WINDOWS_UNICODE "On Windows only, compile with Unicode support" WIN32)
143+
142144

143145
###############################################################################
144146
# Optimization / internal linking preferences

src/OpenColorIO/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,15 @@ if(WIN32)
280280
PRIVATE
281281
XML_STATIC
282282
)
283+
284+
if (OCIO_USE_WINDOWS_UNICODE)
285+
# Add Unicode definitions to use Unicode functions
286+
target_compile_definitions(OpenColorIO
287+
PRIVATE
288+
UNICODE
289+
_UNICODE
290+
)
291+
endif()
283292
endif()
284293

285294
set_target_properties(OpenColorIO PROPERTIES

src/OpenColorIO/Config.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1117,7 +1117,7 @@ ConstConfigRcPtr Config::CreateFromFile(const char * filename)
11171117
throw ExceptionMissingFile ("The config filepath is missing.");
11181118
}
11191119

1120-
std::ifstream istream(filename);
1120+
std::ifstream istream = Platform::CreateInputFileStream(filename, std::ios_base::in);
11211121
if (istream.fail())
11221122
{
11231123
std::ostringstream os;

src/OpenColorIO/ContextVariableUtils.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "ContextVariableUtils.h"
88
#include "utils/StringUtils.h"
9+
#include "Platform.h"
910

1011

1112
#if defined(__APPLE__) && !defined(__IPHONE__)
@@ -19,6 +20,12 @@ extern char ** environ;
1920
namespace
2021
{
2122

23+
#if defined(_WIN32) && defined(UNICODE)
24+
inline wchar_t ** GetEnviron()
25+
{
26+
return _wenviron;
27+
}
28+
#else
2229
inline char ** GetEnviron()
2330
{
2431
#if __IPHONE__
@@ -30,6 +37,7 @@ inline char ** GetEnviron()
3037
return environ;
3138
#endif
3239
}
40+
#endif
3341

3442
} // anon.
3543

@@ -71,11 +79,28 @@ void LoadEnvironment(EnvMap & map, bool update)
7179
{
7280
// First, add or update the context variables with existing env. variables.
7381

82+
#if defined(_WIN32) && defined(UNICODE)
83+
if (GetEnviron() == NULL) {
84+
// If the program starts with "main" instead of "wmain", then wenviron returns NULL until
85+
// the first call to either wgetenv or wputenv. Calling wgetenv, even with an empty
86+
// variable name, will populate wenviron correctly. We also use wgetenv_s (which requires
87+
// a valid size pointer) to suppress safety warnings about wgetenv during the compile.
88+
size_t sz;
89+
_wgetenv_s(&sz, NULL, 0, L"");
90+
}
91+
92+
for (wchar_t **env = GetEnviron(); *env != NULL; ++env)
93+
{
94+
// Split environment up into std::map[name] = value.
95+
96+
const std::string env_str = Platform::Utf16ToUtf8((wchar_t*)*env);
97+
#else
7498
for (char **env = GetEnviron(); *env != NULL; ++env)
7599
{
76100
// Split environment up into std::map[name] = value.
77101

78102
const std::string env_str = (char*)*env;
103+
#endif
79104
const int pos = static_cast<int>(env_str.find_first_of('='));
80105

81106
const std::string name = env_str.substr(0, pos);

src/OpenColorIO/PathUtils.cpp

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55
#include <iostream>
66
#include <map>
7-
#include <sys/stat.h>
87

98
#include <OpenColorIO/OpenColorIO.h>
109

1110
#include "Mutex.h"
1211
#include "PathUtils.h"
12+
#include "Platform.h"
1313
#include "pystring/pystring.h"
1414
#include "utils/StringUtils.h"
1515

@@ -26,39 +26,9 @@ namespace OCIO_NAMESPACE
2626
{
2727
namespace
2828
{
29-
// Here is the explanation of the stat() method:
30-
// https://pubs.opengroup.org/onlinepubs/009695299/basedefs/sys/stat.h.html
31-
// "The st_ino and st_dev fields taken together uniquely identify the file within the system."
32-
//
33-
// However there are limitations to the stat() support on some Windows file systems:
34-
// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/stat-functions?redirectedfrom=MSDN&view=vs-2019
35-
// "The inode, and therefore st_ino, has no meaning in the FAT, HPFS, or NTFS file systems."
36-
37-
// That's the default hash method implementation to compute a hash key based on a file content.
38-
std::string DefaultComputeHash(const std::string &filename)
39-
{
40-
struct stat fileInfo;
41-
if (stat(filename.c_str(), &fileInfo) == 0)
42-
{
43-
// Treat the st_dev (i.e. device) + st_ino (i.e. inode) as a proxy for the contents.
44-
45-
std::ostringstream fasthash;
46-
fasthash << fileInfo.st_dev << ":";
47-
#ifdef _WIN32
48-
// TODO: The hard-linked files are then not correctly supported on Windows platforms.
49-
fasthash << std::hash<std::string>{}(filename);
50-
#else
51-
fasthash << fileInfo.st_ino;
52-
#endif
53-
return fasthash.str();
54-
}
55-
56-
return "";
57-
}
58-
5929
// The global variable holds the hash function to use.
6030
// It could be changed using SetComputeHashFunction() to customize the implementation.
61-
ComputeHashFunction g_hashFunction = DefaultComputeHash;
31+
ComputeHashFunction g_hashFunction = Platform::CreateFileContentHash;
6232

6333
// We mutex both the main map and each item individually, so that
6434
// the potentially slow stat calls dont block other lookups to already
@@ -86,7 +56,7 @@ void SetComputeHashFunction(ComputeHashFunction hashFunction)
8656

8757
void ResetComputeHashFunction()
8858
{
89-
g_hashFunction = DefaultComputeHash;
59+
g_hashFunction = Platform::CreateFileContentHash;
9060
}
9161

9262
std::string GetFastFileHash(const std::string & filename)

src/OpenColorIO/Platform.cpp

Lines changed: 129 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
// SPDX-License-Identifier: BSD-3-Clause
22
// Copyright Contributors to the OpenColorIO Project.
33

4+
#include <codecvt>
5+
#include <locale>
46
#include <random>
57
#include <sstream>
8+
#include <sys/stat.h>
69
#include <vector>
710

811
#include <OpenColorIO/OpenColorIO.h>
@@ -49,12 +52,36 @@ bool Getenv(const char * name, std::string & value)
4952
return false;
5053
}
5154

52-
#ifdef _WIN32
53-
if(uint32_t size = GetEnvironmentVariable(name, nullptr, 0))
55+
#if defined(_WIN32)
56+
// Define working strings, converting to UTF-16 if necessary
57+
#ifdef UNICODE
58+
std::wstring name_str = Utf8ToUtf16(name);
59+
std::wstring value_str;
60+
#else
61+
std::string name_str = name;
62+
std::string value_str;
63+
#endif
64+
65+
if(uint32_t size = GetEnvironmentVariable(name_str.c_str(), nullptr, 0))
5466
{
55-
std::vector<char> buffer(size);
56-
GetEnvironmentVariable(name, buffer.data(), size);
57-
value = std::string(buffer.data());
67+
value_str.resize(size);
68+
69+
GetEnvironmentVariable(name_str.c_str(), &value_str[0], size);
70+
71+
// GetEnvironmentVariable is designed for raw pointer strings and therefore requires that
72+
// the destination buffer be long enough to place a null terminator at the end of it. Since
73+
// we're using std::wstrings here, the null terminator is unnecessary (and causes false
74+
// negatives in unit tests since the extra character makes it "non-equal" to normally
75+
// defined std::wstrings). Therefore, we pop the last character off (the null terminator)
76+
// to ensure that the string conforms to expectations.
77+
value_str.pop_back();
78+
79+
// Return value, converting to UTF-8 if necessary
80+
#ifdef UNICODE
81+
value = Utf16ToUtf8(value_str);
82+
#else
83+
value = value_str;
84+
#endif
5885
return true;
5986
}
6087
else
@@ -81,7 +108,13 @@ void Setenv(const char * name, const std::string & value)
81108
// exists. To avoid the ambiguity, use Unsetenv() when the env. variable removal if needed.
82109

83110
#ifdef _WIN32
111+
112+
#ifdef UNICODE
113+
_wputenv_s(Utf8ToUtf16(name).c_str(), Utf8ToUtf16(value).c_str());
114+
#else
84115
_putenv_s(name, value.c_str());
116+
#endif
117+
85118
#else
86119
::setenv(name, value.c_str(), 1);
87120
#endif
@@ -95,8 +128,14 @@ void Unsetenv(const char * name)
95128
}
96129

97130
#ifdef _WIN32
131+
132+
#ifdef UNICODE
98133
// Note that the Windows _putenv_s() removes the env. variable if the value is empty.
134+
_wputenv_s(Utf8ToUtf16(name).c_str(), L"");
135+
#else
99136
_putenv_s(name, "");
137+
#endif
138+
100139
#else
101140
::unsetenv(name);
102141
#endif
@@ -203,6 +242,91 @@ std::string CreateTempFilename(const std::string & filenameExt)
203242
return filename;
204243
}
205244

245+
std::ifstream CreateInputFileStream(const char * filename, std::ios_base::openmode mode)
246+
{
247+
#if defined(_WIN32) && defined(UNICODE)
248+
return std::ifstream(Utf8ToUtf16(filename).c_str(), mode);
249+
#else
250+
return std::ifstream(filename, mode);
251+
#endif
252+
}
253+
254+
void OpenInputFileStream(std::ifstream & stream, const char * filename, std::ios_base::openmode mode)
255+
{
256+
#if defined(_WIN32) && defined(UNICODE)
257+
stream.open(Utf8ToUtf16(filename).c_str(), mode);
258+
#else
259+
stream.open(filename, mode);
260+
#endif
261+
}
262+
263+
std::wstring Utf8ToUtf16(const std::string & str)
264+
{
265+
if (str.empty()) {
266+
return std::wstring();
267+
}
268+
269+
#ifdef _WIN32
270+
int sz = MultiByteToWideChar(CP_UTF8, 0, &str[0], (int)str.size(), NULL, 0);
271+
std::wstring wstr(sz, 0);
272+
MultiByteToWideChar(CP_UTF8, 0, &str[0], (int)str.size(), &wstr[0], sz);
273+
return wstr;
274+
#else
275+
throw Exception("Only supported by the Windows platform.");
276+
#endif
277+
}
278+
279+
std::string Utf16ToUtf8(const std::wstring & wstr)
280+
{
281+
if (wstr.empty()) {
282+
return std::string();
283+
}
284+
285+
#ifdef _WIN32
286+
int sz = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
287+
std::string str(sz, 0);
288+
WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &str[0], sz, NULL, NULL);
289+
return str;
290+
#else
291+
throw Exception("Only supported by the Windows platform.");
292+
#endif
293+
}
294+
295+
// Here is the explanation of the stat() method:
296+
// https://pubs.opengroup.org/onlinepubs/009695299/basedefs/sys/stat.h.html
297+
// "The st_ino and st_dev fields taken together uniquely identify the file within the system."
298+
//
299+
// However there are limitations to the stat() support on some Windows file systems:
300+
// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/stat-functions?redirectedfrom=MSDN&view=vs-2019
301+
// "The inode, and therefore st_ino, has no meaning in the FAT, HPFS, or NTFS file systems."
302+
303+
// That's the default hash method implementation to compute a hash key based on a file content.
304+
std::string CreateFileContentHash(const std::string &filename)
305+
{
306+
#if defined(_WIN32) && defined(UNICODE)
307+
struct _stat fileInfo;
308+
if (_wstat(Platform::Utf8ToUtf16(filename).c_str(), &fileInfo) == 0)
309+
#else
310+
struct stat fileInfo;
311+
if (stat(filename.c_str(), &fileInfo) == 0)
312+
#endif
313+
{
314+
// Treat the st_dev (i.e. device) + st_ino (i.e. inode) as a proxy for the contents.
315+
316+
std::ostringstream fasthash;
317+
fasthash << fileInfo.st_dev << ":";
318+
#ifdef _WIN32
319+
// TODO: The hard-linked files are then not correctly supported on Windows platforms.
320+
fasthash << std::hash<std::string>{}(filename);
321+
#else
322+
fasthash << fileInfo.st_ino;
323+
#endif
324+
return fasthash.str();
325+
}
326+
327+
return "";
328+
}
329+
206330

207331

208332
} // Platform

src/OpenColorIO/Platform.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#endif // _WIN32
2121

2222

23+
#include <fstream>
2324
#include <string>
2425

2526

@@ -28,6 +29,20 @@
2829

2930
#define sscanf sscanf_s
3031

32+
// Define std::tstring as a wstring when Unicode is enabled and a regular string otherwise
33+
namespace std
34+
{
35+
#ifdef _UNICODE
36+
typedef wstring tstring;
37+
typedef wostringstream tostringstream;
38+
#define LogDebugT(x) LogDebug(Platform::Utf16ToUtf8(x))
39+
#else
40+
typedef string tstring;
41+
typedef ostringstream tostringstream;
42+
#define LogDebugT(x) LogDebug(x)
43+
#endif
44+
}
45+
3146
#endif // _WIN32
3247

3348

@@ -73,6 +88,21 @@ void AlignedFree(void * memBlock);
7388
// the file if created.
7489
std::string CreateTempFilename(const std::string & filenameExt);
7590

91+
// Create an input file stream (std::ifstream) using a UTF-8 filename on any platform.
92+
std::ifstream CreateInputFileStream(const char * filename, std::ios_base::openmode mode);
93+
94+
// Open an input file stream (std::ifstream) using a UTF-8 filename on any platform.
95+
void OpenInputFileStream(std::ifstream & stream, const char * filename, std::ios_base::openmode mode);
96+
97+
// Create a unique hash of a file provided as a UTF-8 filename on any platform.
98+
std::string CreateFileContentHash(const std::string &filename);
99+
100+
// Convert UTF-8 string to UTF-16LE.
101+
std::wstring Utf8ToUtf16(const std::string & str);
102+
103+
// Convert UTF-16LE string to UTF-8.
104+
std::string Utf16ToUtf8(const std::wstring & str);
105+
76106
}
77107

78108
} // namespace OCIO_NAMESPACE

0 commit comments

Comments
 (0)