From 44fb8b2fce1c6837ae4641dd02940546c5eb11ea Mon Sep 17 00:00:00 2001 From: Aras Pranckevicius Date: Sun, 1 Dec 2024 18:22:01 +0200 Subject: [PATCH] perf: faster utf8<->utf16 conversion on Windows OIIO 2.3.13 with PR #3307 changed MultiByteToWideChar/WideCharToMultiByte usage to C++11 functionality, but that has two issues: 1) it is *way* slower, primarily due to locale object access (on Visual C++ STL implementation in VS2022 at least). Since primary use case of these conversions is on Windows, maybe it is better to use a fast code path. 2) whole of machinery is deprecated with C++17 accross the board, and will be removed in C++26. I've kept the existing functions in there since otherwise it would have been an API break, but really maybe with OIIO they should have been un-exposed. Too late now though :( Performance numbers: doing ImageInput::create() on 1138 files where they are not images at all (so OIIO in turns tries all the input plugins on them). Ryzen 5950X, VS2022, Windows: - utf8_to_utf16 3851ms -> 21ms - utf16_to_utf8 1055ms -> 4ms Signed-off-by: Aras Pranckevicius --- src/libutil/strutil.cpp | 43 ++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/src/libutil/strutil.cpp b/src/libutil/strutil.cpp index e240b24ce9..2b68ed8c1f 100644 --- a/src/libutil/strutil.cpp +++ b/src/libutil/strutil.cpp @@ -29,6 +29,9 @@ OIIO_PRAGMA_WARNING_POP #if defined(__APPLE__) || defined(__FreeBSD__) # include #endif +#ifdef _WIN32 +# include +#endif #include #include @@ -961,6 +964,17 @@ Strutil::replace(string_view str, string_view pattern, string_view replacement, std::wstring Strutil::utf8_to_utf16wstring(string_view str) noexcept { +#ifdef _WIN32 + // UTF8<->UTF16 conversions are primarily needed on Windows, so use the + // fastest option (C++11 is many times slower due to locale + // access overhead, and is deprecated starting with C++17). + std::wstring result; + result.resize( + MultiByteToWideChar(CP_UTF8, 0, str.data(), str.length(), NULL, 0)); + MultiByteToWideChar(CP_UTF8, 0, str.data(), str.length(), result.data(), + (int)result.size()); + return result; +#else try { OIIO_PRAGMA_WARNING_PUSH OIIO_CLANG_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") @@ -970,6 +984,7 @@ Strutil::utf8_to_utf16wstring(string_view str) noexcept } catch (const std::exception&) { return std::wstring(); } +#endif } @@ -977,6 +992,17 @@ Strutil::utf8_to_utf16wstring(string_view str) noexcept std::string Strutil::utf16_to_utf8(const std::wstring& str) noexcept { +#ifdef _WIN32 + // UTF8<->UTF16 conversions are primarily needed on Windows, so use the + // fastest option (C++11 is many times slower due to locale + // access overhead, and is deprecated starting with C++17). + std::string result; + result.resize(WideCharToMultiByte(CP_UTF8, 0, str.data(), str.length(), + NULL, 0, NULL, NULL)); + WideCharToMultiByte(CP_UTF8, 0, str.data(), str.length(), &result[0], + (int)result.size(), NULL, NULL); + return result; +#else try { OIIO_PRAGMA_WARNING_PUSH OIIO_CLANG_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") @@ -986,6 +1012,7 @@ Strutil::utf16_to_utf8(const std::wstring& str) noexcept } catch (const std::exception&) { return std::string(); } +#endif } @@ -993,22 +1020,24 @@ Strutil::utf16_to_utf8(const std::wstring& str) noexcept std::string Strutil::utf16_to_utf8(const std::u16string& str) noexcept { +#ifdef _WIN32 + std::string result; + result.resize(WideCharToMultiByte(CP_UTF8, 0, (const WCHAR*)str.data(), + str.length(), NULL, 0, NULL, NULL)); + WideCharToMultiByte(CP_UTF8, 0, (const WCHAR*)str.data(), str.length(), + &result[0], (int)result.size(), NULL, NULL); + return result; +#else try { OIIO_PRAGMA_WARNING_PUSH OIIO_CLANG_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") - // There is a bug in MSVS 2017 causing an unresolved symbol if char16_t is used (see https://stackoverflow.com/a/35103224) -#if defined _MSC_VER && _MSC_VER >= 1900 && _MSC_VER < 1930 - std::wstring_convert, int16_t> convert; - auto p = reinterpret_cast(str.data()); - return convert.to_bytes(p, p + str.size()); -#else std::wstring_convert, char16_t> conv; return conv.to_bytes(str); -#endif OIIO_PRAGMA_WARNING_POP } catch (const std::exception&) { return std::string(); } +#endif }