Skip to content

Commit 4c7a030

Browse files
committed
Add Win32 Unicode <-> UTF-8 conversion functions.
These will be used by functions that need to use Unicode Win32 APIS, like GetFullPathNameW() which supports long paths, instead of GetFullPathNameA() which does not, even when long paths are enabled on the system!
1 parent 47ed2d2 commit 4c7a030

File tree

3 files changed

+83
-1
lines changed

3 files changed

+83
-1
lines changed

src/util.cc

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,55 @@ void Win32Fatal(const char* function, const char* hint) {
561561
Fatal("%s: %s", function, GetLastErrorString().c_str());
562562
}
563563
}
564-
#endif
564+
565+
bool ConvertUTF8ToWin32Unicode(const std::string& input, std::wstring* output,
566+
std::string* err) {
567+
output->clear();
568+
if (input.empty())
569+
return true;
570+
571+
int int_size = static_cast<int>(input.size());
572+
if (static_cast<size_t>(int_size) != input.size()) {
573+
*err = "Input string length > INT_MAX";
574+
return false;
575+
}
576+
int wide_size =
577+
MultiByteToWideChar(CP_UTF8, 0, input.c_str(), int_size, nullptr, 0);
578+
if (wide_size <= 0) {
579+
*err = "MultiByteToWideChar(" + input + "): " + GetLastErrorString();
580+
return false;
581+
}
582+
output->resize(static_cast<size_t>(wide_size));
583+
MultiByteToWideChar(CP_UTF8, 0, input.c_str(), int_size,
584+
const_cast<wchar_t*>(output->data()), wide_size);
585+
return true;
586+
}
587+
588+
bool ConvertWin32UnicodeToUTF8(const std::wstring& input, std::string* output,
589+
std::string* err) {
590+
output->clear();
591+
if (input.empty())
592+
return true;
593+
594+
int int_size = static_cast<int>(input.size());
595+
if (int_size != input.size()) {
596+
*err = "Input string length > INT_MAX";
597+
return false;
598+
}
599+
int utf8_size = WideCharToMultiByte(CP_UTF8, 0, input.c_str(), int_size, NULL,
600+
0, NULL, NULL);
601+
if (utf8_size <= 0) {
602+
*err = "WideCharToMultiByte(" + std::string(input.begin(), input.end()) +
603+
"): " + GetLastErrorString();
604+
return false;
605+
}
606+
607+
output->resize(static_cast<size_t>(utf8_size));
608+
WideCharToMultiByte(CP_UTF8, 0, input.c_str(), int_size,
609+
const_cast<char*>(output->data()), utf8_size, NULL, NULL);
610+
return true;
611+
}
612+
#endif // _WIN32
565613

566614
bool islatinalpha(int c) {
567615
// isalpha() is locale-dependent.

src/util.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,18 @@ std::string GetLastErrorString();
124124
/// Calls Fatal() with a function name and GetLastErrorString.
125125
NORETURN void Win32Fatal(const char* function, const char* hint = NULL);
126126

127+
/// Convert UTF-8 string to Win32 Unicode.
128+
/// On success, set |*output| then return true.
129+
/// On Failure, clear |*output|, set |*err| then return false.
130+
bool ConvertUTF8ToWin32Unicode(const std::string& input, std::wstring* output,
131+
std::string* err);
132+
133+
/// Convert WIN32 Unicode to UTF-8 string.
134+
/// On success, set |*output| then return true.
135+
/// On Failure, clear |*output|, set |*err| then return false.
136+
bool ConvertWin32UnicodeToUTF8(const std::wstring& input, std::string* output,
137+
std::string* err);
138+
127139
/// Naive implementation of C++ 20 std::bit_cast(), used to fix Clang and GCC
128140
/// [-Wcast-function-type] warning on casting result of GetProcAddress().
129141
template <class To, class From>

src/util_test.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,3 +502,25 @@ TEST(StripAnsiEscapeCodes, StripColors) {
502502
EXPECT_EQ("affixmgr.cxx:286:15: warning: using the result... [-Wparentheses]",
503503
stripped);
504504
}
505+
506+
#ifdef _WIN32
507+
TEST(ConvertWin32UnicodeToUTF8, Test) {
508+
std::string output;
509+
std::string err;
510+
EXPECT_TRUE(ConvertWin32UnicodeToUTF8(std::wstring(L"B\u00E9b\u00E9"),
511+
&output, &err));
512+
EXPECT_TRUE(err.empty()) << err;
513+
EXPECT_EQ(output, std::string("B\xC3\xA9"
514+
"b\xC3\xA9"));
515+
}
516+
517+
TEST(ConvertUTF8ToWin32Unicode, Test) {
518+
std::string err;
519+
std::wstring output;
520+
EXPECT_TRUE(ConvertUTF8ToWin32Unicode(std::string("B\xC3\xA9"
521+
"b\xC3\xA9"),
522+
&output, &err));
523+
EXPECT_TRUE(err.empty()) << err;
524+
EXPECT_EQ(output, std::wstring(L"B\u00E9b\u00E9"));
525+
}
526+
#endif // _WIN32

0 commit comments

Comments
 (0)